Пример #1
   * Method to run the FPGRowth algorithm.
   * @param input the path to an input file containing a transaction database.
   * @param output the output file path for saving the result (if null, the result will be returned
   *     by the method instead of being saved).
   * @param minsupp the minimum support threshold.
   * @return the result if no output file path is provided.
   * @throws IOException exception if error reading or writing files
  public Itemsets runAlgorithm(String input, String output, double minsupp)
      throws FileNotFoundException, IOException {
    // record start time
    startTimestamp = System.currentTimeMillis();
    // number of itemsets found
    itemsetCount = 0;

    // initialize tool to record memory usage

    // if the user want to keep the result into memory
    if (output == null) {
      writer = null;
      patterns = new Itemsets("FREQUENT ITEMSETS");
    } else { // if the user want to save the result to a file
      patterns = null;
      writer = new BufferedWriter(new FileWriter(output));
      itemsetOutputBuffer = new int[BUFFERS_SIZE];

    // (1) PREPROCESSING: Initial database scan to determine the frequency of each item
    // The frequency is stored in a map:
    //    key: item   value: support
    final Map<Integer, Integer> mapSupport = scanDatabaseToDetermineFrequencyOfSingleItems(input);

    // convert the minimum support as percentage to a
    // relative minimum support
    this.minSupportRelative = (int) Math.ceil(minsupp * transactionCount);

    // (2) Scan the database again to build the initial FP-Tree
    // Before inserting a transaction in the FPTree, we sort the items
    // by descending order of support.  We ignore items that
    // do not have the minimum support.
    FPTree tree = new FPTree();

    // read the file
    BufferedReader reader = new BufferedReader(new FileReader(input));
    String line;
    // for each line (transaction) until the end of the file
    while (((line = reader.readLine()) != null)) {
      // if the line is  a comment, is  empty or is a
      // kind of metadata
      if (line.isEmpty() == true
          || line.charAt(0) == '#'
          || line.charAt(0) == '%'
          || line.charAt(0) == '@') {

      String[] lineSplited = line.split(" ");
      //			Set<Integer> alreadySeen = new HashSet<Integer>();
      List<Integer> transaction = new ArrayList<Integer>();

      // for each item in the transaction
      for (String itemString : lineSplited) {
        Integer item = Integer.parseInt(itemString);
        // only add items that have the minimum support
        if (mapSupport.get(item) >= minSupportRelative) {
      // sort item in the transaction by descending order of support
          new Comparator<Integer>() {
            public int compare(Integer item1, Integer item2) {
              // compare the frequency
              int compare = mapSupport.get(item2) - mapSupport.get(item1);
              // if the same frequency, we check the lexical ordering!
              if (compare == 0) {
                return (item1 - item2);
              // otherwise, just use the frequency
              return compare;
      // add the sorted transaction to the fptree.
    // close the input file

    // We create the header table for the tree using the calculated support of single items

    // (5) We start to mine the FP-Tree by calling the recursive method.
    // Initially, the prefix alpha is empty.
    // if at least an item is frequent
    if (tree.headerList.size() > 0) {
      // initialize the buffer for storing the current itemset
      itemsetBuffer = new int[BUFFERS_SIZE];
      // and another buffer
      itemsetTempBuffer = new int[BUFFERS_SIZE];
      // recursively generate frequent itemsets using the fp-tree
      // Note: we assume that the initial FP-Tree has more than one path
      // which should generally be the case.
      fpgrowth(tree, itemsetBuffer, 0, transactionCount, mapSupport);

    // close the output file if the result was saved to a file
    if (writer != null) {
    // record the execution end time
    endTime = System.currentTimeMillis();

    // check the memory usage

    // return the result (if saved to memory)
    return patterns;
Пример #2
   * Mine an FP-Tree having more than one path.
   * @param tree the FP-tree
   * @param prefix the current prefix, named "alpha"
   * @param mapSupport the frequency of items in the FP-Tree
   * @throws IOException exception if error writing the output file
  private void fpgrowth(
      FPTree tree,
      int[] prefix,
      int prefixLength,
      int prefixSupport,
      Map<Integer, Integer> mapSupport)
      throws IOException {
    ////		======= DEBUG ========
    //		System.out.print("###### Prefix: ");
    //		for(int k=0; k< prefixLength; k++) {
    //			System.out.print(prefix[k] + "  ");
    //		}
    //		System.out.println("\n");
    ////				========== END DEBUG =======
    //		System.out.println(tree);

    // We will check if the FPtree contains a single path
    boolean singlePath = true;
    // We will use a variable to keep the support of the single path if there is one
    int singlePathSupport = 0;
    // This variable is used to count the number of items in the single path
    // if there is one
    int position = 0;
    // if the root has more than one child, than it is not a single path
    if (tree.root.childs.size() > 1) {
      singlePath = false;
    } else {
      // Otherwise,
      // if the root has exactly one child, we need to recursively check childs
      // of the child to see if they also have one child
      FPNode currentNode = tree.root.childs.get(0);
      while (true) {
        // if the current child has more than one child, it is not a single path!
        if (currentNode.childs.size() > 1) {
          singlePath = false;
        // otherwise, we copy the current item in the buffer and move to the child
        // the buffer will be used to store all items in the path
        itemsetTempBuffer[position] = currentNode.itemID;
        // we keep the support of the path
        singlePathSupport = currentNode.counter;
        // if this node has no child, that means that this is the end of this path
        // and it is a single path, so we break
        if (currentNode.childs.size() == 0) {
        currentNode = currentNode.childs.get(0);

    // Case 1: the FPtree contains a single path
    if (singlePath && singlePathSupport >= minSupportRelative) {
      // We save the path, because it is a maximal itemset
          itemsetTempBuffer, position, prefix, prefixLength, singlePathSupport);
    } else {
      // For each frequent item in the header table list of the tree in reverse order.
      for (int i = tree.headerList.size() - 1; i >= 0; i--) {
        // get the item
        Integer item = tree.headerList.get(i);

        // get the item support
        int support = mapSupport.get(item);

        // Create Beta by concatening prefix Alpha by adding the current item to alpha
        prefix[prefixLength] = item;

        // calculate the support of the new prefix beta
        int betaSupport = (prefixSupport < support) ? prefixSupport : support;

        // save beta to the output file
        saveItemset(prefix, prefixLength + 1, betaSupport);

        // === (A) Construct beta's conditional pattern base ===
        // It is a subdatabase which consists of the set of prefix paths
        // in the FP-tree co-occuring with the prefix pattern.
        List<List<FPNode>> prefixPaths = new ArrayList<List<FPNode>>();
        FPNode path = tree.mapItemNodes.get(item);

        // Map to count the support of items in the conditional prefix tree
        // Key: item   Value: support
        Map<Integer, Integer> mapSupportBeta = new HashMap<Integer, Integer>();

        while (path != null) {
          // if the path is not just the root node
          if (path.parent.itemID != -1) {
            // create the prefixpath
            List<FPNode> prefixPath = new ArrayList<FPNode>();
            // add this node.
            prefixPath.add(path); // NOTE: we add it just to keep its support,
            // actually it should not be part of the prefixPath

            // ####
            int pathCount = path.counter;

            // Recursively add all the parents of this node.
            FPNode parent = path.parent;
            while (parent.itemID != -1) {

              // if the first time we see that node id
              if (mapSupportBeta.get(parent.itemID) == null) {
                // just add the path count
                mapSupportBeta.put(parent.itemID, pathCount);
              } else {
                // otherwise, make the sum with the value already stored
                mapSupportBeta.put(parent.itemID, mapSupportBeta.get(parent.itemID) + pathCount);
              parent = parent.parent;
            // add the path to the list of prefixpaths
          // We will look for the next prefixpath
          path = path.nodeLink;

        // (B) Construct beta's conditional FP-Tree
        // Create the tree.
        FPTree treeBeta = new FPTree();
        // Add each prefixpath in the FP-tree.
        for (List<FPNode> prefixPath : prefixPaths) {
          treeBeta.addPrefixPath(prefixPath, mapSupportBeta, minSupportRelative);

        // Mine recursively the Beta tree if the root has child(s)
        if (treeBeta.root.childs.size() > 0) {

          // Create the header list.
          // recursive call
          fpgrowth(treeBeta, prefix, prefixLength + 1, betaSupport, mapSupportBeta);