예제 #1
0
  /**
   * Find association rules in a database, given the set of frequent itemsets.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) {
    min_support = minSupport;
    min_confidence = minConfidence;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // we add the rule to our collection if it satisfies
          // our conditions
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap_genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules(is_frequent, consequents);
    }

    return rules;
  }
예제 #2
0
  // this is the ap-genrules procedure that generates rules out
  // of a frequent itemset.
  private void ap_genrules(Itemset is_frequent, Vector consequents) {
    if (consequents.size() == 0) return;

    // the size of frequent must be bigger than the size of the itemsets
    // in consequents by at least 2, in order to be able to generate
    // a rule in this call
    if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) {
      Vector new_consequents = apriori_gen(consequents);
      AssociationRule ar;

      for (int i = 0; i < new_consequents.size(); i++) {
        Itemset is_consequent = (Itemset) new_consequents.get(i);
        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        // if the rule satisfies our requirements we add it
        // to our collection
        if (confidence >= min_confidence)
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        // otherwise we remove the consequent from the collection
        // and we update the index such that we don't skip a consequent
        else new_consequents.remove(i--);
      }

      ap_genrules(is_frequent, new_consequents);
    }
  }
예제 #3
0
  // this is the ap-genrules procedure that generates rules out
  // of a frequent itemset.
  private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) {
    if (consequents.size() == 0) return;

    // the size of frequent must be bigger than the size of the itemsets
    // in consequents by at least 2, in order to be able to generate
    // a rule in this call
    if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) {
      Vector new_consequents = apriori_gen(consequents);
      AssociationRule ar;

      for (int i = 0; i < new_consequents.size(); i++) {
        Itemset is_consequent = (Itemset) new_consequents.get(i);
        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        // if the rule satisfies our confidence requirements
        if (confidence >= min_confidence) {
          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
        // otherwise we remove the consequent from the collection
        // and we update the index such that we don't skip a consequent
        else new_consequents.remove(i--);
      }

      ap_genrules_constraint(is_frequent, new_consequents);
    }
  }
예제 #4
0
  // this method stores all frequent itemsets that have support
  // greater than the minimum support in a SET for more efficient
  // access times.
  private void initializeSupports(DBCacheReader cacheReader) {
    // create new SET
    supports = new SET();

    try {
      Itemset is;
      while (true) {
        // get item from cache
        is = cacheReader.getNextItemset();
        // if item has support greater than the minimum support
        // required then we add it to the SET
        if (is.getSupport() >= min_support) {
          supports.insert(is);
        }
      }
    } catch (EOFException e) {
      // do nothing, we just reached the EOF
    } catch (IOException e) {
      System.err.println("Error scanning cache!!!\n" + e);
    } catch (ClassNotFoundException e) {
      System.err.println("Error scanning cache!!!\n" + e);
    }
  }
예제 #5
0
  /**
   * Find association rules in a database, given the set of frequent itemsets and a set of
   * restrictions.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @param inAntecedent the items that must appear in the antecedent of each rule, if null then
   *     this constraint is ignored
   * @param inConsequent the items that must appear in the consequent of each rule, if null then
   *     this constraint is ignored
   * @param ignored the items that should be ignored, if null then this constraint is ignored
   * @param maxAntecedent the maximum number of items that can appear in the antecedent of each
   *     rule, if 0 then this constraint is ignored
   * @param minConsequent the minimum number of items that should appear in the consequent of each
   *     rule, if 0 then this constraint is ignored
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(
      DBCacheReader cacheReader,
      float minSupport,
      float minConfidence,
      Itemset inAntecedent,
      Itemset inConsequent,
      Itemset ignored,
      int maxAntecedent,
      int minConsequent) {
    min_support = minSupport;
    min_confidence = minConfidence;

    is_in_antecedent = inAntecedent;
    is_in_consequent = inConsequent;
    is_ignored = ignored;
    max_antecedent = maxAntecedent;
    min_consequent = minConsequent;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    if (frequent.size() == 0) return rules;

    // if we need to ignore some items
    if (ignored != null) {
      // remove all frequent itemsets that contain
      // items to be ignored; their subsets that do
      // not contain those items will remain
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (is.doesIntersect(ignored)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // if we need to have some items in the antecedent or consequent
    if (inAntecedent != null || inConsequent != null) {
      // remove frequent itemsets that don't have the
      // required items
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap-genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules_constraint(is_frequent, consequents);
    }

    return rules;
  }