/** * Method to run the FPGRowth algorithm. * * @param input the path to an input file containing a transaction database. * @param output the output file path for saving the result (if null, the result will be returned * by the method instead of being saved). * @param minsupp the minimum support threshold. * @return the result if no output file path is provided. * @throws IOException exception if error reading or writing files */ public Itemsets runAlgorithm(String input, String output, double minsupp) throws FileNotFoundException, IOException { // record start time startTimestamp = System.currentTimeMillis(); // number of itemsets found itemsetCount = 0; // initialize tool to record memory usage MemoryLogger.getInstance().reset(); MemoryLogger.getInstance().checkMemory(); // if the user want to keep the result into memory if (output == null) { writer = null; patterns = new Itemsets("FREQUENT ITEMSETS"); } else { // if the user want to save the result to a file patterns = null; writer = new BufferedWriter(new FileWriter(output)); itemsetOutputBuffer = new int[BUFFERS_SIZE]; } // (1) PREPROCESSING: Initial database scan to determine the frequency of each item // The frequency is stored in a map: // key: item value: support final Map<Integer, Integer> mapSupport = scanDatabaseToDetermineFrequencyOfSingleItems(input); // convert the minimum support as percentage to a // relative minimum support this.minSupportRelative = (int) Math.ceil(minsupp * transactionCount); // (2) Scan the database again to build the initial FP-Tree // Before inserting a transaction in the FPTree, we sort the items // by descending order of support. We ignore items that // do not have the minimum support. FPTree tree = new FPTree(); // read the file BufferedReader reader = new BufferedReader(new FileReader(input)); String line; // for each line (transaction) until the end of the file while (((line = reader.readLine()) != null)) { // if the line is a comment, is empty or is a // kind of metadata if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') { continue; } String[] lineSplited = line.split(" "); // Set<Integer> alreadySeen = new HashSet<Integer>(); List<Integer> transaction = new ArrayList<Integer>(); // for each item in the transaction for (String itemString : lineSplited) { Integer item = Integer.parseInt(itemString); // only add items that have the minimum support if (mapSupport.get(item) >= minSupportRelative) { transaction.add(item); } } // sort item in the transaction by descending order of support Collections.sort( transaction, new Comparator<Integer>() { public int compare(Integer item1, Integer item2) { // compare the frequency int compare = mapSupport.get(item2) - mapSupport.get(item1); // if the same frequency, we check the lexical ordering! if (compare == 0) { return (item1 - item2); } // otherwise, just use the frequency return compare; } }); // add the sorted transaction to the fptree. tree.addTransaction(transaction); } // close the input file reader.close(); // We create the header table for the tree using the calculated support of single items tree.createHeaderList(mapSupport); // (5) We start to mine the FP-Tree by calling the recursive method. // Initially, the prefix alpha is empty. // if at least an item is frequent if (tree.headerList.size() > 0) { // initialize the buffer for storing the current itemset itemsetBuffer = new int[BUFFERS_SIZE]; // and another buffer itemsetTempBuffer = new int[BUFFERS_SIZE]; // recursively generate frequent itemsets using the fp-tree // Note: we assume that the initial FP-Tree has more than one path // which should generally be the case. fpgrowth(tree, itemsetBuffer, 0, transactionCount, mapSupport); } // close the output file if the result was saved to a file if (writer != null) { writer.close(); } // record the execution end time endTime = System.currentTimeMillis(); // check the memory usage MemoryLogger.getInstance().checkMemory(); // return the result (if saved to memory) return patterns; }
/** * Mine an FP-Tree having more than one path. * * @param tree the FP-tree * @param prefix the current prefix, named "alpha" * @param mapSupport the frequency of items in the FP-Tree * @throws IOException exception if error writing the output file */ private void fpgrowth( FPTree tree, int[] prefix, int prefixLength, int prefixSupport, Map<Integer, Integer> mapSupport) throws IOException { //// ======= DEBUG ======== // System.out.print("###### Prefix: "); // for(int k=0; k< prefixLength; k++) { // System.out.print(prefix[k] + " "); // } // System.out.println("\n"); //// ========== END DEBUG ======= // System.out.println(tree); // We will check if the FPtree contains a single path boolean singlePath = true; // We will use a variable to keep the support of the single path if there is one int singlePathSupport = 0; // This variable is used to count the number of items in the single path // if there is one int position = 0; // if the root has more than one child, than it is not a single path if (tree.root.childs.size() > 1) { singlePath = false; } else { // Otherwise, // if the root has exactly one child, we need to recursively check childs // of the child to see if they also have one child FPNode currentNode = tree.root.childs.get(0); while (true) { // if the current child has more than one child, it is not a single path! if (currentNode.childs.size() > 1) { singlePath = false; break; } // otherwise, we copy the current item in the buffer and move to the child // the buffer will be used to store all items in the path itemsetTempBuffer[position] = currentNode.itemID; // we keep the support of the path singlePathSupport = currentNode.counter; position++; // if this node has no child, that means that this is the end of this path // and it is a single path, so we break if (currentNode.childs.size() == 0) { break; } currentNode = currentNode.childs.get(0); } } // Case 1: the FPtree contains a single path if (singlePath && singlePathSupport >= minSupportRelative) { // We save the path, because it is a maximal itemset saveAllCombinationsOfPrefixPath( itemsetTempBuffer, position, prefix, prefixLength, singlePathSupport); } else { // For each frequent item in the header table list of the tree in reverse order. for (int i = tree.headerList.size() - 1; i >= 0; i--) { // get the item Integer item = tree.headerList.get(i); // get the item support int support = mapSupport.get(item); // Create Beta by concatening prefix Alpha by adding the current item to alpha prefix[prefixLength] = item; // calculate the support of the new prefix beta int betaSupport = (prefixSupport < support) ? prefixSupport : support; // save beta to the output file saveItemset(prefix, prefixLength + 1, betaSupport); // === (A) Construct beta's conditional pattern base === // It is a subdatabase which consists of the set of prefix paths // in the FP-tree co-occuring with the prefix pattern. List<List<FPNode>> prefixPaths = new ArrayList<List<FPNode>>(); FPNode path = tree.mapItemNodes.get(item); // Map to count the support of items in the conditional prefix tree // Key: item Value: support Map<Integer, Integer> mapSupportBeta = new HashMap<Integer, Integer>(); while (path != null) { // if the path is not just the root node if (path.parent.itemID != -1) { // create the prefixpath List<FPNode> prefixPath = new ArrayList<FPNode>(); // add this node. prefixPath.add(path); // NOTE: we add it just to keep its support, // actually it should not be part of the prefixPath // #### int pathCount = path.counter; // Recursively add all the parents of this node. FPNode parent = path.parent; while (parent.itemID != -1) { prefixPath.add(parent); // FOR EACH PATTERN WE ALSO UPDATE THE ITEM SUPPORT AT THE SAME TIME // if the first time we see that node id if (mapSupportBeta.get(parent.itemID) == null) { // just add the path count mapSupportBeta.put(parent.itemID, pathCount); } else { // otherwise, make the sum with the value already stored mapSupportBeta.put(parent.itemID, mapSupportBeta.get(parent.itemID) + pathCount); } parent = parent.parent; } // add the path to the list of prefixpaths prefixPaths.add(prefixPath); } // We will look for the next prefixpath path = path.nodeLink; } // (B) Construct beta's conditional FP-Tree // Create the tree. FPTree treeBeta = new FPTree(); // Add each prefixpath in the FP-tree. for (List<FPNode> prefixPath : prefixPaths) { treeBeta.addPrefixPath(prefixPath, mapSupportBeta, minSupportRelative); } // Mine recursively the Beta tree if the root has child(s) if (treeBeta.root.childs.size() > 0) { // Create the header list. treeBeta.createHeaderList(mapSupportBeta); // recursive call fpgrowth(treeBeta, prefix, prefixLength + 1, betaSupport, mapSupportBeta); } } } }