예제 #1
0
  void saveItemset(Itemset itemset) throws IOException {
    itemsetCount++;

    // if the result should be saved to a file
    if (writer != null) {
      writer.write(itemset.toString() + " #SUP: " + itemset.getAbsoluteSupport());
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      patterns.addItemset(itemset, itemset.size());
    }
  }
  public static void main(String[] arg) throws FileNotFoundException, IOException {
    // Loading the binary context
    String input = fileToPath("contextIGB.txt");

    // STEP 1: Applying the FP-GROWTH algorithm to find frequent itemsets
    double minsupp = 0.5;
    AlgoFPGrowth fpgrowth = new AlgoFPGrowth();
    Itemsets patterns = fpgrowth.runAlgorithm(input, null, minsupp);
    int databaseSize = fpgrowth.getDatabaseSize();
    patterns.printItemsets(databaseSize);

    // STEP 2: Generating all rules from the set of frequent itemsets (based on Agrawal & Srikant,
    // 94)
    double minlift = 0;
    double minconf = 0.90;
    AlgoAgrawalFaster94 algoAgrawal = new AlgoAgrawalFaster94();
    // the next line run the algorithm.
    // Note: we pass null as output file path, because we don't want
    // to save the result to a file, but keep it into memory.
    AssocRules rules = algoAgrawal.runAlgorithm(patterns, null, databaseSize, minconf, minlift);
    rules.printRulesWithLift(databaseSize);
  }
예제 #3
0
  void saveItemsetToFile(Integer item, Integer support) throws IOException {
    itemsetCount++;

    // if the result should be saved to a file
    if (writer != null) {
      writer.write(item + " #SUP: " + support);
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      Itemset itemset = new Itemset(item);
      itemset.setAbsoluteSupport(support);
      patterns.addItemset(itemset, 1);
    }
  }
예제 #4
0
  /**
   * Write a frequent itemset that is found to the output file or keep into memory if the user
   * prefer that the result be saved into memory.
   */
  private void saveItemset(int[] itemset, int itemsetLength, int support) throws IOException {

    // increase the number of itemsets found for statistics purpose
    itemsetCount++;

    // if the result should be saved to a file
    if (writer != null) {
      // copy the itemset in the output buffer and sort items
      System.arraycopy(itemset, 0, itemsetOutputBuffer, 0, itemsetLength);
      Arrays.sort(itemsetOutputBuffer, 0, itemsetLength);

      // Create a string buffer
      StringBuilder buffer = new StringBuilder();
      // write the items of the itemset
      for (int i = 0; i < itemsetLength; i++) {
        buffer.append(itemsetOutputBuffer[i]);
        if (i != itemsetLength - 1) {
          buffer.append(' ');
        }
      }
      // Then, write the support
      buffer.append(" #SUP: ");
      buffer.append(support);
      // write to file and create a new line
      writer.write(buffer.toString());
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      // create an object Itemset and add it to the set of patterns
      // found.
      int[] itemsetArray = new int[itemsetLength];
      System.arraycopy(itemset, 0, itemsetArray, 0, itemsetLength);

      // sort the itemset so that it is sorted according to lexical ordering before we show it to
      // the user
      Arrays.sort(itemsetArray);

      Itemset itemsetObj = new Itemset(itemsetArray);
      itemsetObj.setAbsoluteSupport(support);
      patterns.addItemset(itemsetObj, itemsetLength);
    }
  }
  /**
   * Calculate the support of an itemset by looking at the frequent patterns of the same size.
   * Because patterns are sorted by lexical order, we use a binary search. This is MUCH MORE
   * efficient than just browsing the full list of patterns.
   *
   * @param itemset the itemset.
   * @return the support of the itemset
   */
  private int calculateSupport(int[] itemset) {
    // We first get the list of patterns having the same size as "itemset"
    List<Itemset> patternsSameSize = patterns.getLevels().get(itemset.length);
    //
    // We perform a binary search to find the position of itemset in this list
    int first = 0;
    int last = patternsSameSize.size() - 1;

    while (first <= last) {
      int middle = (first + last) >> 1; // >>1 means to divide by 2
      int[] itemsetMiddle = patternsSameSize.get(middle).getItems();

      int comparison = ArraysAlgos.comparatorItemsetSameSize.compare(itemset, itemsetMiddle);
      if (comparison > 0) {
        first =
            middle
                + 1; //  the itemset compared is larger than the subset according to the lexical
                     // order
      } else if (comparison < 0) {
        last =
            middle
                - 1; //  the itemset compared is smaller than the subset  is smaller according to
                     // the lexical order
      } else {
        // we have found the itemset, so we return its support.
        return patternsSameSize.get(middle).getAbsoluteSupport();
      }
    }
    // The following line will not happen because in the context of this algorithm, we will
    // always search for itemsets that are frequent and thus will be in the list of patterns.
    // We just put the following line to avoid compilation error and detect if the error if this
    // case was ever to happen.
    return 0;
    //        throw new RuntimeException("INVALID SUPPORT - THIS SHOULD NOT HAPPEN BECAUSE ALL
    // ITEMSETS HAVE TO BE FREQUENT");
  }
  /**
   * Run the algorithm for generating association rules from a set of itemsets.
   *
   * @param patterns the set of itemsets
   * @param output the output file path. If null the result is saved in memory and returned by the
   *     method.
   * @param databaseSize the number of transactions in the original database
   * @return the set of rules found if the user chose to save the result to memory
   * @throws IOException exception if error while writting to file
   */
  private AssocRules runAlgorithm(Itemsets patterns, String output, int databaseSize)
      throws IOException {

    // if the user want to keep the result into memory
    if (output == null) {
      writer = null;
      rules = new AssocRules("ASSOCIATION RULES");
    } else {
      // if the user want to save the result to a file
      rules = null;
      writer = new BufferedWriter(new FileWriter(output));
    }

    this.databaseSize = databaseSize;

    // record the time when the algorithm starts
    startTimestamp = System.currentTimeMillis();
    // initialize variable to count the number of rules found
    ruleCount = 0;
    // save itemsets in a member variable
    this.patterns = patterns;

    // SORTING
    // First, we sort all itemsets having the same size by lexical order
    // We do this for optimization purposes. If the itemsets are sorted, it allows to
    // perform two optimizations:
    // 1) When we need to calculate the support of an itemset (in the method
    // "calculateSupport()") we can use a binary search instead of browsing the whole list.
    // 2) When combining itemsets to generate candidate, we can use the
    //    lexical order to avoid comparisons (in the method "generateCandidates()").

    // For itemsets of the same size
    for (List<Itemset> itemsetsSameSize : patterns.getLevels()) {
      // Sort by lexicographical order using a Comparator
      Collections.sort(
          itemsetsSameSize,
          new Comparator<Itemset>() {
            @Override
            public int compare(Itemset o1, Itemset o2) {
              // The following code assume that itemsets are the same size
              return ArraysAlgos.comparatorItemsetSameSize.compare(o1.getItems(), o2.getItems());
            }
          });
    }
    // END OF SORTING

    // Now we will generate the rules.

    // For each frequent itemset of size >=2 that we will name "lk"
    for (int k = 2; k < patterns.getLevels().size(); k++) {
      for (Itemset lk : patterns.getLevels().get(k)) {

        // create a variable H1 for recursion
        List<int[]> H1_for_recursion = new ArrayList<int[]>();

        // For each itemset "itemsetSize1" of size 1 that is member of lk
        for (int item : lk.getItems()) {
          int itemsetHm_P_1[] = new int[] {item};

          // make a copy of  lk without items from  hm_P_1
          int[] itemset_Lk_minus_hm_P_1 = ArraysAlgos.cloneItemSetMinusOneItem(lk.getItems(), item);

          // Now we will calculate the support and confidence
          // of the rule: itemset_Lk_minus_hm_P_1 ==>  hm_P_1
          int support = calculateSupport(itemset_Lk_minus_hm_P_1); // THIS COULD BE
          // OPTIMIZED ?
          double supportAsDouble = (double) support;

          // calculate the confidence of the rule : itemset_Lk_minus_hm_P_1 ==>  hm_P_1
          double conf = lk.getAbsoluteSupport() / supportAsDouble;

          // if the confidence is lower than minconf
          if (conf < minconf || Double.isInfinite(conf)) {
            continue;
          }

          double lift = 0;
          int supportHm_P_1 = 0;
          // if the user is using the minlift threshold, we will need
          // to also calculate the lift of the rule:  itemset_Lk_minus_hm_P_1 ==>  hm_P_1
          if (usingLift) {
            // if we want to calculate the lift, we need the support of hm_P_1
            supportHm_P_1 =
                calculateSupport(
                    itemsetHm_P_1); // if we want to calculate the lift, we need to add this.
            // calculate the lift
            double term1 = ((double) lk.getAbsoluteSupport()) / databaseSize;
            double term2 = supportAsDouble / databaseSize;
            double term3 = ((double) supportHm_P_1 / databaseSize);
            lift = term1 / (term2 * term3);

            // if the lift is not enough
            if (lift < minlift) {
              continue;
            }
          }

          // If we are here, it means that the rule respect the minconf and minlift parameters.
          // Therefore, we output the rule.
          saveRule(
              itemset_Lk_minus_hm_P_1,
              support,
              itemsetHm_P_1,
              supportHm_P_1,
              lk.getAbsoluteSupport(),
              conf,
              lift);

          // Then we keep the itemset  hm_P_1 to find more rules using this itemset and lk.
          H1_for_recursion.add(itemsetHm_P_1);
          // ================ END OF WHAT I HAVE ADDED
        }
        // Finally, we make a recursive call to continue explores rules that can be made with "lk"
        apGenrules(k, 1, lk, H1_for_recursion);
      }
    }

    // close the file if we saved the result to a file
    if (writer != null) {
      writer.close();
    }
    // record the end time of the algorithm execution
    endTimeStamp = System.currentTimeMillis();

    // Return the rules found if the user chose to save the result to memory rather than a file.
    // Otherwise, null will be returned
    return rules;
  }