/** * Run the algorithm. * * @param minSupport Minsup as a percentage (ex: 0.05 = 5 %) * @param minConfidence minimum confidence (a value between 0 and 1). * @param input the input file path * @param output the output file path * @param windowSize a window size * @throws IOException exception if there is an error reading/writing files */ public void runAlgorithm( double minSupport, double minConfidence, String input, String output, int windowSize) throws IOException { // load the input file into memory try { this.database = new SequenceDatabase(); database.loadFile(input); } catch (Exception e) { e.printStackTrace(); } // convert minimum support to a relative minimum support (integer) this.minsuppRelative = (int) Math.ceil(minSupport * database.size()); // run the algorithm runAlgorithm(input, output, minsuppRelative, minConfidence, windowSize); }
/** * Run the algorithm. * * @param relativeMinSupport minsup as a a relative value (integer) * @param minConfidence minimum confidence (a value between 0 and 1). * @param input the input file path * @param output the output file path * @param windowSize a window size * @throws IOException exception if there is an error reading/writing files */ public void runAlgorithm( String input, String output, int relativeMinSupport, double minConfidence, int windowSize) throws IOException { this.minconf = minConfidence; // read the database into memory if (database == null) { try { this.database = new SequenceDatabase(); database.loadFile(input); } catch (Exception e) { e.printStackTrace(); } } // IMPORTANT : THIS IS A FIX SO THAT THE DEFINITION IS THE SAME AS IN THE ARTICLE!! this.windowSize = windowSize + 1; // if minsup is 0, set it to 1 this.minsuppRelative = relativeMinSupport; if (this.minsuppRelative == 0) { // protection this.minsuppRelative = 1; } // reset the stats for memory usage MemoryLogger.getInstance().reset(); // prepare the object for writing the output file writer = new BufferedWriter(new FileWriter(output)); // save the start time timeStart = System.currentTimeMillis(); // for stats // remove infrequent items from the database removeItemsThatAreNotFrequent(database); // note frequent items in a list "listFrequents" List<String> listFrequents = new ArrayList<String>(); // for each item for (Entry<String, Map<Integer, Occurence>> entry : mapItemCount.entrySet()) { // if it is frequent if (entry.getValue().size() >= minsuppRelative) { // add the item to the list listFrequents.add(entry.getKey()); } } // FOR EACH FREQUENT ITEM WE COMPARE WITH EACH OTHER FREQUENT ITEM TO // TRY TO GENERATE A RULE 1-1. for (int i = 0; i < listFrequents.size(); i++) { String intI = listFrequents.get(i); Map<Integer, Occurence> occurencesI = mapItemCount.get(intI); for (int j = i + 1; j < listFrequents.size(); j++) { String intJ = listFrequents.get(j); Map<Integer, Occurence> occurencesJ = mapItemCount.get(intJ); // (1) Calculate tidsI, tidsJ, tidsJ-->J and tidsI->J Set<Integer> tidsI = new HashSet<Integer>(); Set<Integer> tidsJ = null; Set<Integer> tidsIJ = new HashSet<Integer>(); Set<Integer> tidsJI = new HashSet<Integer>(); // for each occurence of I looptid: for (Occurence occI : occurencesI.values()) { // add the sequenceID to tidsI tidsI.add(occI.sequenceID); // if J does not appear in that sequence continue loop Occurence occJ = occurencesJ.get(occI.sequenceID); if (occJ == null) { continue looptid; } // make a big loop to compare if I appears before // J in that sequence and // if J appears before I boolean addedIJ = false; boolean addedJI = false; // for each occurence of I in that sequence loopIJ: for (Short posI : occI.occurences) { // for each occurence of J in that sequence for (Short posJ : occJ.occurences) { if (!posI.equals(posJ) && Math.abs(posI - posJ) <= windowSize) { if (posI <= posJ) { // if I is before J tidsIJ.add(occI.sequenceID); addedIJ = true; } else { // if J is before I tidsJI.add(occI.sequenceID); addedJI = true; } // if we have found that I is before J and J is before I // we don't need to continue. if (addedIJ && addedJI) { break loopIJ; } } } } } // END // (2) check if the two itemsets have enough common tids // if not, we don't need to generate a rule for them. // create rule IJ if (tidsIJ.size() >= minsuppRelative) { // calculate the confidence of I ==> J double confIJ = ((double) tidsIJ.size()) / occurencesI.size(); // create itemset of the rule I ==> J String[] itemset1 = new String[] {intI}; String[] itemset2 = new String[] {intJ}; // if the confidence is high enough, save the rule if (confIJ >= minConfidence) { saveRule(tidsIJ, confIJ, itemset1, itemset2); } // Calculate tidsJ. tidsJ = new HashSet<Integer>(); for (Occurence occJ : occurencesJ.values()) { tidsJ.add(occJ.sequenceID); } // recursive call to try to expand the rule expandLeft(itemset1, itemset2, tidsI, tidsIJ); expandRight(itemset1, itemset2, tidsI, tidsJ, tidsIJ); } // create rule JI if (tidsJI.size() >= minsuppRelative) { double confJI = ((double) tidsJI.size()) / occurencesJ.size(); // create itemsets for that rule String[] itemset1 = new String[] {intI}; String[] itemset2 = new String[] {intJ}; // if the rule has enough confidence, save it! if (confJI >= minConfidence) { saveRule(tidsJI, confJI, itemset2, itemset1); // rules.addRule(ruleJI); } // Calculate tidsJ. if (tidsJ == null) { tidsJ = new HashSet<Integer>(); for (Occurence occJ : occurencesJ.values()) { tidsJ.add(occJ.sequenceID); } } // recursive call to try to expand the rule expandRight(itemset2, itemset1, tidsJ, tidsI, tidsJI /*, occurencesJ, occurencesI*/); expandLeft(itemset2, itemset1, tidsJ, tidsJI /*, occurencesI*/); } } } // save the end time for the execution of the algorithm timeEnd = System.currentTimeMillis(); // for stats // close the file writer.close(); database = null; }