Ejemplo n.º 1
0
  /**
   * Calculate the support of an itemset by looking at the frequent patterns of the same size.
   * Because patterns are sorted by lexical order, we use a binary search. This is MUCH MORE
   * efficient than just browsing the full list of patterns. An alternative would be to use a trie
   * to store patterns but it may require a bit more memory.
   *
   * @param itemset the itemset.
   * @return the support of the itemset
   */
  private int calculateSupport(int[] itemset) {
    // We first get the list of patterns having the same size as "itemset"
    List<Itemset> patternsSameSize = patterns.getLevels().get(itemset.length);
    //
    // We perform a binary search to find the position of itemset in this list
    int first = 0;
    int last = patternsSameSize.size() - 1;

    while (first <= last) {
      int middle = (first + last) >> 1; // >>1 means to divide by 2
      int[] itemsetMiddle = patternsSameSize.get(middle).getItems();

      int comparison = ArraysAlgos.comparatorItemsetSameSize.compare(itemset, itemsetMiddle);
      if (comparison > 0) {
        first =
            middle
                + 1; //  the itemset compared is larger than the subset according to the lexical
                     // order
      } else if (comparison < 0) {
        last =
            middle
                - 1; //  the itemset compared is smaller than the subset  is smaller according to
                     // the lexical order
      } else {
        // we have found the itemset, so we return its support.
        return patternsSameSize.get(middle).getAbsoluteSupport();
      }
    }

    // If the itemset is not found in this list, it means that the itemset
    // is not closed, so we need to find the smallest superset (its closure) to
    // determine its support.
    // We start from itemset of length |itemset|+1 and increase the size of itemset after each
    // while loop.
    int size = itemset.length;
    loop:
    while (true) {
      size++;
      List<Itemset> patternsList = patterns.getLevels().get(size);
      // For each pattern of a given size
      for (Itemset pattern : patternsList) {
        int[] patternArray = pattern.getItems();

        // If the first item of the pattern is larger than the first item of the itemset,
        // we don't need to compare with following patterns.
        if (patternArray[0] > itemset[0]) {
          continue loop;
        }

        // Otherwise, we check if itemset is contained in pattern
        int posItemset = 0;
        int posPattern = 0;
        while (posPattern < patternArray.length) {
          if (patternArray[posPattern] == itemset[posItemset]) {
            posItemset++;
            // if it is contained completely
            if (posItemset == itemset.length) {
              return pattern.getAbsoluteSupport();
            }
          } else if (patternArray[posPattern] >= itemset[posItemset]) {
            // if the current item of pattern is larger than the current item in pattern,
            // then itemset cannot be contained in pattern so we stop considering it
            break;
          }
          posPattern++;
        }
      }
    }
  }
Ejemplo n.º 2
0
  /**
   * Run the algorithm for generating association rules from a set of itemsets.
   *
   * @param patterns the set of itemsets
   * @param output the output file path. If null the result is saved in memory and returned by the
   *     method.
   * @param databaseSize the number of transactions in the original database
   * @return the set of rules found if the user chose to save the result to memory
   * @throws IOException exception if error while writting to file
   */
  private AssocRules runAlgorithm(Itemsets patterns, String output, int databaseSize)
      throws IOException {

    // if the user want to keep the result into memory
    if (output == null) {
      writer = null;
      rules = new AssocRules("ASSOCIATION RULES");
    } else {
      // if the user want to save the result to a file
      rules = null;
      writer = new BufferedWriter(new FileWriter(output));
    }

    this.databaseSize = databaseSize;

    // record the time when the algorithm starts
    startTimestamp = System.currentTimeMillis();
    // initialize variable to count the number of rules found
    ruleCount = 0;
    // save itemsets in a member variable
    this.patterns = patterns;

    // SORTING
    // First, we sort all itemsets having the same size by lexical order
    // We do this for optimization purposes. If the itemsets are sorted, it allows to
    // perform two optimizations:
    // 1) When we need to calculate the support of an itemset (in the method
    // "calculateSupport()") we can use a binary search instead of browsing the whole list.
    // 2) When combining itemsets to generate candidate, we can use the
    //    lexical order to avoid comparisons (in the method "generateCandidates()").

    // For itemsets of the same size
    for (List<Itemset> itemsetsSameSize : patterns.getLevels()) {
      // Sort by lexicographical order using a Comparator
      Collections.sort(
          itemsetsSameSize,
          new Comparator<Itemset>() {
            @Override
            public int compare(Itemset o1, Itemset o2) {
              // The following code assume that itemsets are the same size
              return ArraysAlgos.comparatorItemsetSameSize.compare(o1.getItems(), o2.getItems());
            }
          });
    }
    // END OF SORTING

    // Now we will generate the rules.

    // For each frequent itemset of size >=2 that we will name "lk"
    for (int k = 2; k < patterns.getLevels().size(); k++) {
      for (Itemset lk : patterns.getLevels().get(k)) {

        // create a variable H1 for recursion
        List<int[]> H1_for_recursion = new ArrayList<int[]>();

        // For each itemset "itemsetSize1" of size 1 that is member of lk
        for (int item : lk.getItems()) {
          int itemsetHm_P_1[] = new int[] {item};

          // make a copy of  lk without items from  hm_P_1
          int[] itemset_Lk_minus_hm_P_1 = ArraysAlgos.cloneItemSetMinusOneItem(lk.getItems(), item);

          // Now we will calculate the support and confidence
          // of the rule: itemset_Lk_minus_hm_P_1 ==>  hm_P_1
          int support = calculateSupport(itemset_Lk_minus_hm_P_1); // THIS COULD BE
          // OPTIMIZED ?
          double supportAsDouble = (double) support;

          // calculate the confidence of the rule : itemset_Lk_minus_hm_P_1 ==>  hm_P_1
          double conf = lk.getAbsoluteSupport() / supportAsDouble;

          // if the confidence is lower than minconf
          if (conf < minconf || Double.isInfinite(conf)) {
            continue;
          }

          double lift = 0;
          int supportHm_P_1 = 0;
          // if the user is using the minlift threshold, we will need
          // to also calculate the lift of the rule:  itemset_Lk_minus_hm_P_1 ==>  hm_P_1
          if (usingLift) {
            // if we want to calculate the lift, we need the support of hm_P_1
            supportHm_P_1 =
                calculateSupport(
                    itemsetHm_P_1); // if we want to calculate the lift, we need to add this.
            // calculate the lift
            double term1 = ((double) lk.getAbsoluteSupport()) / databaseSize;
            double term2 = supportAsDouble / databaseSize;
            double term3 = ((double) supportHm_P_1 / databaseSize);
            lift = term1 / (term2 * term3);

            // if the lift is not enough
            if (lift < minlift) {
              continue;
            }
          }

          // If we are here, it means that the rule respect the minconf and minlift parameters.
          // Therefore, we output the rule.
          saveRule(
              itemset_Lk_minus_hm_P_1,
              support,
              itemsetHm_P_1,
              supportHm_P_1,
              lk.getAbsoluteSupport(),
              conf,
              lift);

          // Then we keep the itemset  hm_P_1 to find more rules using this itemset and lk.
          H1_for_recursion.add(itemsetHm_P_1);
          // ================ END OF WHAT I HAVE ADDED
        }

        // Finally, we make a recursive call to continue explores rules that can be made with "lk"
        apGenrules(k, 1, lk, H1_for_recursion);
      }
    }

    // close the file if we saved the result to a file
    if (writer != null) {
      writer.close();
    }
    // record the end time of the algorithm execution
    endTimeStamp = System.currentTimeMillis();

    // Return the rules found if the user chose to save the result to memory rather than a file.
    // Otherwise, null will be returned
    return rules;
  }