/** * Run the algorithm. * * @param input the file path of an input transaction database. * @param output the path of the desired output file * @param minsupp minimum support threshold as a percentage (double) * @throws IOException exception if error while writing the file */ public void runAlgorithm(String input, String output, double minsupp) throws FileNotFoundException, IOException { // record the start time startTimestamp = System.currentTimeMillis(); // reinitialize the number of itemsets found to 0 itemsetCount = 0; // Prepare the output file writer = new BufferedWriter(new FileWriter(output)); // (1) PREPROCESSING: Initial database scan to determine the frequency of each item // The frequency is store in a map where: // key: item value: support count final Map<String, Integer> mapSupport = new HashMap<String, Integer>(); // call this method to perform the database scan scanDatabaseToDetermineFrequencyOfSingleItems(input, mapSupport); // convert the absolute minimum support to a relative minimum support // by multiplying by the database size. this.relativeMinsupp = (int) Math.ceil(minsupp * transactionCount); // (2) Scan the database again to build the initial FP-Tree // Before inserting a transaction in the FPTree, we sort the items // by descending order of support. We ignore items that // do not have the minimum support. // create the FPTree FPTree_Strings tree = new FPTree_Strings(); BufferedReader reader = new BufferedReader(new FileReader(input)); String line; // for each line (transaction) in the input file until the end of file while (((line = reader.readLine()) != null)) { // if the line is a comment, is empty or is a // kind of metadata if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') { continue; } // split the transaction into items String[] lineSplited = line.split(" "); // create an array list to store the items List<String> transaction = new ArrayList<String>(); // for each item in the transaction for (String itemString : lineSplited) { // if it is frequent, add it to the transaction // otherwise not because it cannot be part of a frequent itemset. if (mapSupport.get(itemString) >= relativeMinsupp) { transaction.add(itemString); } } // sort item in the transaction by descending order of support Collections.sort( transaction, new Comparator<String>() { public int compare(String item1, String item2) { // compare the support int compare = mapSupport.get(item2) - mapSupport.get(item1); // if the same support, we check the lexical ordering! if (compare == 0) { return item1.compareTo(item2); } // otherwise use the support return compare; } }); // add the sorted transaction to the fptree. tree.addTransaction(transaction); } // close the input file reader.close(); // We create the header table for the tree tree.createHeaderList(mapSupport); // (5) We start to mine the FP-Tree by calling the recursive method. // Initially, the prefix alpha is empty. String[] prefixAlpha = new String[0]; fpgrowth(tree, prefixAlpha, transactionCount, mapSupport); // close the output file writer.close(); // record the end time endTime = System.currentTimeMillis(); // print(tree.root, " "); }
/** * Mine an FP-Tree having more than one path. * * @param tree the FP-tree * @param prefix the current prefix, named "alpha" * @param mapSupport the frequency of items in the FP-Tree * @throws IOException exception if error writing the output file */ private void fpgrowthMoreThanOnePath( FPTree_Strings tree, String[] prefixAlpha, int prefixSupport, Map<String, Integer> mapSupport) throws IOException { // We process each frequent item in the header table list of the tree in reverse order. for (int i = tree.headerList.size() - 1; i >= 0; i--) { String item = tree.headerList.get(i); int support = mapSupport.get(item); // if the item is not frequent, we skip it if (support < relativeMinsupp) { continue; } // Create Beta by concatening Alpha with the current item // and add it to the list of frequent patterns String[] beta = new String[prefixAlpha.length + 1]; System.arraycopy(prefixAlpha, 0, beta, 0, prefixAlpha.length); beta[prefixAlpha.length] = item; // calculate the support of beta int betaSupport = (prefixSupport < support) ? prefixSupport : support; // save beta to the output file writeItemsetToFile(beta, betaSupport); // === Construct beta's conditional pattern base === // It is a subdatabase which consists of the set of prefix paths // in the FP-tree co-occuring with the suffix pattern. List<List<FPNode_Strings>> prefixPaths = new ArrayList<List<FPNode_Strings>>(); FPNode_Strings path = tree.mapItemNodes.get(item); while (path != null) { // if the path is not just the root node if (path.parent.itemID != null) { // create the prefixpath List<FPNode_Strings> prefixPath = new ArrayList<FPNode_Strings>(); // add this node. prefixPath.add(path); // NOTE: we add it just to keep its support, // actually it should not be part of the prefixPath // Recursively add all the parents of this node. FPNode_Strings parent = path.parent; while (parent.itemID != null) { prefixPath.add(parent); parent = parent.parent; } // add the path to the list of prefixpaths prefixPaths.add(prefixPath); } // We will look for the next prefixpath path = path.nodeLink; } // (A) Calculate the frequency of each item in the prefixpath Map<String, Integer> mapSupportBeta = new HashMap<String, Integer>(); // for each prefixpath for (List<FPNode_Strings> prefixPath : prefixPaths) { // the support of the prefixpath is the support of its first node. int pathCount = prefixPath.get(0).counter; // for each node in the prefixpath, // except the first one, we count the frequency for (int j = 1; j < prefixPath.size(); j++) { FPNode_Strings node = prefixPath.get(j); // if the first time we see that node id if (mapSupportBeta.get(node.itemID) == null) { // just add the path count mapSupportBeta.put(node.itemID, pathCount); } else { // otherwise, make the sum with the value already stored mapSupportBeta.put(node.itemID, mapSupportBeta.get(node.itemID) + pathCount); } } } // (B) Construct beta's conditional FP-Tree FPTree_Strings treeBeta = new FPTree_Strings(); // add each prefixpath in the FP-tree for (List<FPNode_Strings> prefixPath : prefixPaths) { treeBeta.addPrefixPath(prefixPath, mapSupportBeta, relativeMinsupp); } // Create the header list. treeBeta.createHeaderList(mapSupportBeta); // Mine recursively the Beta tree if the root as child(s) if (treeBeta.root.childs.size() > 0) { // recursive call fpgrowth(treeBeta, beta, betaSupport, mapSupportBeta); } } }