Exemplo n.º 1
0
  /**
   * Find association rules in a database, given the set of frequent itemsets.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) {
    min_support = minSupport;
    min_confidence = minConfidence;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // we add the rule to our collection if it satisfies
          // our conditions
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap_genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules(is_frequent, consequents);
    }

    return rules;
  }
Exemplo n.º 2
0
  /**
   * It adds a sequence from an array of string that we have to interpret
   *
   * @param integers
   * @param sequenceID
   */
  public void addSequence(String[] integers, int sequenceID) {
    long timestamp = -1;
    Sequence sequence = new Sequence(sequences.size());
    sequence.setID(sequenceID);
    Itemset itemset = new Itemset();
    int inicio = 0;
    Map<Item, Boolean> counted = new HashMap<Item, Boolean>();

    for (int i = inicio; i < integers.length; i++) {
      if (integers[i].codePointAt(0) == '<') { // Timestamp
        String value = integers[i].substring(1, integers[i].length() - 1);
        timestamp = Long.parseLong(value);
        itemset.setTimestamp(timestamp);
      } else if (integers[i].equals("-1")) { // end of an itemset
        long time = itemset.getTimestamp() + 1;
        sequence.addItemset(itemset);
        itemset = new Itemset();
        itemset.setTimestamp(time);
      } else if (integers[i].equals("-2")) { // end of a sequence
        sequences.add(sequence);
      } else {
        // extract the value for an item
        Item item = itemFactory.getItem(Integer.parseInt(integers[i]));
        if (counted.get(item) == null) {
          counted.put(item, Boolean.TRUE);
          BitSet appearances = frequentItems.get(item);
          if (appearances == null) {
            appearances = new BitSet();
            frequentItems.put(item, appearances);
          }
          appearances.set(sequence.getId());
        }
        itemset.addItem(item);
      }
    }
  }
Exemplo n.º 3
0
  /**
   * Find association rules in a database, given the set of frequent itemsets and a set of
   * restrictions.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @param inAntecedent the items that must appear in the antecedent of each rule, if null then
   *     this constraint is ignored
   * @param inConsequent the items that must appear in the consequent of each rule, if null then
   *     this constraint is ignored
   * @param ignored the items that should be ignored, if null then this constraint is ignored
   * @param maxAntecedent the maximum number of items that can appear in the antecedent of each
   *     rule, if 0 then this constraint is ignored
   * @param minConsequent the minimum number of items that should appear in the consequent of each
   *     rule, if 0 then this constraint is ignored
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(
      DBCacheReader cacheReader,
      float minSupport,
      float minConfidence,
      Itemset inAntecedent,
      Itemset inConsequent,
      Itemset ignored,
      int maxAntecedent,
      int minConsequent) {
    min_support = minSupport;
    min_confidence = minConfidence;

    is_in_antecedent = inAntecedent;
    is_in_consequent = inConsequent;
    is_ignored = ignored;
    max_antecedent = maxAntecedent;
    min_consequent = minConsequent;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    if (frequent.size() == 0) return rules;

    // if we need to ignore some items
    if (ignored != null) {
      // remove all frequent itemsets that contain
      // items to be ignored; their subsets that do
      // not contain those items will remain
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (is.doesIntersect(ignored)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // if we need to have some items in the antecedent or consequent
    if (inAntecedent != null || inConsequent != null) {
      // remove frequent itemsets that don't have the
      // required items
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap-genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules_constraint(is_frequent, consequents);
    }

    return rules;
  }