void saveItemset(Itemset itemset) throws IOException { itemsetCount++; // if the result should be saved to a file if (writer != null) { writer.write(itemset.toString() + " #SUP: " + itemset.getAbsoluteSupport()); writer.newLine(); } // otherwise the result is kept into memory else { patterns.addItemset(itemset, itemset.size()); } }
public static void main(String[] arg) throws FileNotFoundException, IOException { // Loading the binary context String input = fileToPath("contextIGB.txt"); // STEP 1: Applying the FP-GROWTH algorithm to find frequent itemsets double minsupp = 0.5; AlgoFPGrowth fpgrowth = new AlgoFPGrowth(); Itemsets patterns = fpgrowth.runAlgorithm(input, null, minsupp); int databaseSize = fpgrowth.getDatabaseSize(); patterns.printItemsets(databaseSize); // STEP 2: Generating all rules from the set of frequent itemsets (based on Agrawal & Srikant, // 94) double minlift = 0; double minconf = 0.90; AlgoAgrawalFaster94 algoAgrawal = new AlgoAgrawalFaster94(); // the next line run the algorithm. // Note: we pass null as output file path, because we don't want // to save the result to a file, but keep it into memory. AssocRules rules = algoAgrawal.runAlgorithm(patterns, null, databaseSize, minconf, minlift); rules.printRulesWithLift(databaseSize); }
void saveItemsetToFile(Integer item, Integer support) throws IOException { itemsetCount++; // if the result should be saved to a file if (writer != null) { writer.write(item + " #SUP: " + support); writer.newLine(); } // otherwise the result is kept into memory else { Itemset itemset = new Itemset(item); itemset.setAbsoluteSupport(support); patterns.addItemset(itemset, 1); } }
/** * Write a frequent itemset that is found to the output file or keep into memory if the user * prefer that the result be saved into memory. */ private void saveItemset(int[] itemset, int itemsetLength, int support) throws IOException { // increase the number of itemsets found for statistics purpose itemsetCount++; // if the result should be saved to a file if (writer != null) { // copy the itemset in the output buffer and sort items System.arraycopy(itemset, 0, itemsetOutputBuffer, 0, itemsetLength); Arrays.sort(itemsetOutputBuffer, 0, itemsetLength); // Create a string buffer StringBuilder buffer = new StringBuilder(); // write the items of the itemset for (int i = 0; i < itemsetLength; i++) { buffer.append(itemsetOutputBuffer[i]); if (i != itemsetLength - 1) { buffer.append(' '); } } // Then, write the support buffer.append(" #SUP: "); buffer.append(support); // write to file and create a new line writer.write(buffer.toString()); writer.newLine(); } // otherwise the result is kept into memory else { // create an object Itemset and add it to the set of patterns // found. int[] itemsetArray = new int[itemsetLength]; System.arraycopy(itemset, 0, itemsetArray, 0, itemsetLength); // sort the itemset so that it is sorted according to lexical ordering before we show it to // the user Arrays.sort(itemsetArray); Itemset itemsetObj = new Itemset(itemsetArray); itemsetObj.setAbsoluteSupport(support); patterns.addItemset(itemsetObj, itemsetLength); } }
/** * Calculate the support of an itemset by looking at the frequent patterns of the same size. * Because patterns are sorted by lexical order, we use a binary search. This is MUCH MORE * efficient than just browsing the full list of patterns. * * @param itemset the itemset. * @return the support of the itemset */ private int calculateSupport(int[] itemset) { // We first get the list of patterns having the same size as "itemset" List<Itemset> patternsSameSize = patterns.getLevels().get(itemset.length); // // We perform a binary search to find the position of itemset in this list int first = 0; int last = patternsSameSize.size() - 1; while (first <= last) { int middle = (first + last) >> 1; // >>1 means to divide by 2 int[] itemsetMiddle = patternsSameSize.get(middle).getItems(); int comparison = ArraysAlgos.comparatorItemsetSameSize.compare(itemset, itemsetMiddle); if (comparison > 0) { first = middle + 1; // the itemset compared is larger than the subset according to the lexical // order } else if (comparison < 0) { last = middle - 1; // the itemset compared is smaller than the subset is smaller according to // the lexical order } else { // we have found the itemset, so we return its support. return patternsSameSize.get(middle).getAbsoluteSupport(); } } // The following line will not happen because in the context of this algorithm, we will // always search for itemsets that are frequent and thus will be in the list of patterns. // We just put the following line to avoid compilation error and detect if the error if this // case was ever to happen. return 0; // throw new RuntimeException("INVALID SUPPORT - THIS SHOULD NOT HAPPEN BECAUSE ALL // ITEMSETS HAVE TO BE FREQUENT"); }
/** * Run the algorithm for generating association rules from a set of itemsets. * * @param patterns the set of itemsets * @param output the output file path. If null the result is saved in memory and returned by the * method. * @param databaseSize the number of transactions in the original database * @return the set of rules found if the user chose to save the result to memory * @throws IOException exception if error while writting to file */ private AssocRules runAlgorithm(Itemsets patterns, String output, int databaseSize) throws IOException { // if the user want to keep the result into memory if (output == null) { writer = null; rules = new AssocRules("ASSOCIATION RULES"); } else { // if the user want to save the result to a file rules = null; writer = new BufferedWriter(new FileWriter(output)); } this.databaseSize = databaseSize; // record the time when the algorithm starts startTimestamp = System.currentTimeMillis(); // initialize variable to count the number of rules found ruleCount = 0; // save itemsets in a member variable this.patterns = patterns; // SORTING // First, we sort all itemsets having the same size by lexical order // We do this for optimization purposes. If the itemsets are sorted, it allows to // perform two optimizations: // 1) When we need to calculate the support of an itemset (in the method // "calculateSupport()") we can use a binary search instead of browsing the whole list. // 2) When combining itemsets to generate candidate, we can use the // lexical order to avoid comparisons (in the method "generateCandidates()"). // For itemsets of the same size for (List<Itemset> itemsetsSameSize : patterns.getLevels()) { // Sort by lexicographical order using a Comparator Collections.sort( itemsetsSameSize, new Comparator<Itemset>() { @Override public int compare(Itemset o1, Itemset o2) { // The following code assume that itemsets are the same size return ArraysAlgos.comparatorItemsetSameSize.compare(o1.getItems(), o2.getItems()); } }); } // END OF SORTING // Now we will generate the rules. // For each frequent itemset of size >=2 that we will name "lk" for (int k = 2; k < patterns.getLevels().size(); k++) { for (Itemset lk : patterns.getLevels().get(k)) { // create a variable H1 for recursion List<int[]> H1_for_recursion = new ArrayList<int[]>(); // For each itemset "itemsetSize1" of size 1 that is member of lk for (int item : lk.getItems()) { int itemsetHm_P_1[] = new int[] {item}; // make a copy of lk without items from hm_P_1 int[] itemset_Lk_minus_hm_P_1 = ArraysAlgos.cloneItemSetMinusOneItem(lk.getItems(), item); // Now we will calculate the support and confidence // of the rule: itemset_Lk_minus_hm_P_1 ==> hm_P_1 int support = calculateSupport(itemset_Lk_minus_hm_P_1); // THIS COULD BE // OPTIMIZED ? double supportAsDouble = (double) support; // calculate the confidence of the rule : itemset_Lk_minus_hm_P_1 ==> hm_P_1 double conf = lk.getAbsoluteSupport() / supportAsDouble; // if the confidence is lower than minconf if (conf < minconf || Double.isInfinite(conf)) { continue; } double lift = 0; int supportHm_P_1 = 0; // if the user is using the minlift threshold, we will need // to also calculate the lift of the rule: itemset_Lk_minus_hm_P_1 ==> hm_P_1 if (usingLift) { // if we want to calculate the lift, we need the support of hm_P_1 supportHm_P_1 = calculateSupport( itemsetHm_P_1); // if we want to calculate the lift, we need to add this. // calculate the lift double term1 = ((double) lk.getAbsoluteSupport()) / databaseSize; double term2 = supportAsDouble / databaseSize; double term3 = ((double) supportHm_P_1 / databaseSize); lift = term1 / (term2 * term3); // if the lift is not enough if (lift < minlift) { continue; } } // If we are here, it means that the rule respect the minconf and minlift parameters. // Therefore, we output the rule. saveRule( itemset_Lk_minus_hm_P_1, support, itemsetHm_P_1, supportHm_P_1, lk.getAbsoluteSupport(), conf, lift); // Then we keep the itemset hm_P_1 to find more rules using this itemset and lk. H1_for_recursion.add(itemsetHm_P_1); // ================ END OF WHAT I HAVE ADDED } // Finally, we make a recursive call to continue explores rules that can be made with "lk" apGenrules(k, 1, lk, H1_for_recursion); } } // close the file if we saved the result to a file if (writer != null) { writer.close(); } // record the end time of the algorithm execution endTimeStamp = System.currentTimeMillis(); // Return the rules found if the user chose to save the result to memory rather than a file. // Otherwise, null will be returned return rules; }