예제 #1
0
  /**
   * Find association rules in a database, given the set of frequent itemsets.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) {
    min_support = minSupport;
    min_confidence = minConfidence;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // we add the rule to our collection if it satisfies
          // our conditions
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap_genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules(is_frequent, consequents);
    }

    return rules;
  }
예제 #2
0
  // this is the ap-genrules procedure that generates rules out
  // of a frequent itemset.
  private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) {
    if (consequents.size() == 0) return;

    // the size of frequent must be bigger than the size of the itemsets
    // in consequents by at least 2, in order to be able to generate
    // a rule in this call
    if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) {
      Vector new_consequents = apriori_gen(consequents);
      AssociationRule ar;

      for (int i = 0; i < new_consequents.size(); i++) {
        Itemset is_consequent = (Itemset) new_consequents.get(i);
        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        // if the rule satisfies our confidence requirements
        if (confidence >= min_confidence) {
          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
        // otherwise we remove the consequent from the collection
        // and we update the index such that we don't skip a consequent
        else new_consequents.remove(i--);
      }

      ap_genrules_constraint(is_frequent, new_consequents);
    }
  }
예제 #3
0
  private void generateLarge(ArrayList<Itemset> Lk, int clas) {
    int i, j, size;
    ArrayList<Itemset> Lnew;
    Itemset newItemset, itemseti, itemsetj;

    size = Lk.size();

    if (size > 1) {
      if (((Lk.get(0)).size() < this.nVariables) && ((Lk.get(0)).size() < this.depth)) {
        Lnew = new ArrayList<Itemset>();

        for (i = 0; i < size - 1; i++) {
          itemseti = Lk.get(i);
          for (j = i + 1; j < size; j++) {
            itemsetj = Lk.get(j);
            if (this.isCombinable(itemseti, itemsetj)) {
              newItemset = itemseti.clone();
              newItemset.add((itemsetj.get(itemsetj.size() - 1)).clone());
              newItemset.calculateSupports(this.dataBase, this.train);
              if (newItemset.getSupportClass()[0] >= this.minsup) Lnew.add(newItemset);
            }
          }

          this.generateRules(Lnew, clas);
          this.generateLarge(Lnew, clas);
          Lnew.clear();
          System.gc();
        }
      }
    }
  }
예제 #4
0
  // this is the apriori_gen procedure that generates starting from
  // a k-itemset collection a new collection of (k+1)-itemsets.
  private Vector apriori_gen(Vector itemsets) {
    if (itemsets.size() == 0) return new Vector(0);

    // create a hashtree so that we can check more efficiently the
    // number of subsets
    // this may not really be necessary when generating rules since
    // itemsets will probably be a small collection, but just in case
    HashTree ht_itemsets = new HashTree(itemsets);
    for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i);
    ht_itemsets.prepareForDescent();

    Vector result = new Vector();
    Itemset is_i, is_j;
    for (int i = 0; i < itemsets.size() - 1; i++)
      for (int j = i + 1; j < itemsets.size(); j++) {
        is_i = (Itemset) itemsets.get(i);
        is_j = (Itemset) itemsets.get(j);

        // if we cannot combine element i with j then we shouldn't
        // waste time for bigger j's. This is because we keep the
        // collections ordered, an important detail in this implementation
        if (!is_i.canCombineWith(is_j)) break;
        else {
          Itemset is = is_i.combineWith(is_j);

          // a real k-itemset has k (k-1)-subsets
          // so we test that this holds before adding to result
          if (ht_itemsets.countSubsets(is) == is.size()) result.add(is);
        }
      }

    return result;
  }
예제 #5
0
  // this is the ap-genrules procedure that generates rules out
  // of a frequent itemset.
  private void ap_genrules(Itemset is_frequent, Vector consequents) {
    if (consequents.size() == 0) return;

    // the size of frequent must be bigger than the size of the itemsets
    // in consequents by at least 2, in order to be able to generate
    // a rule in this call
    if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) {
      Vector new_consequents = apriori_gen(consequents);
      AssociationRule ar;

      for (int i = 0; i < new_consequents.size(); i++) {
        Itemset is_consequent = (Itemset) new_consequents.get(i);
        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        // if the rule satisfies our requirements we add it
        // to our collection
        if (confidence >= min_confidence)
          rules.add(
              new AssociationRule(
                  is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        // otherwise we remove the consequent from the collection
        // and we update the index such that we don't skip a consequent
        else new_consequents.remove(i--);
      }

      ap_genrules(is_frequent, new_consequents);
    }
  }
예제 #6
0
  private boolean isCombinable(Itemset itemseti, Itemset itemsetj) {
    int i;
    Item itemi, itemj;
    Itemset itemset;

    itemi = itemseti.get(itemseti.size() - 1);
    itemj = itemsetj.get(itemseti.size() - 1);
    if (itemi.getVariable() >= itemj.getVariable()) return (false);

    return (true);
  }
예제 #7
0
  /**
   * It adds a rule to the rule base
   *
   * @param itemset itemset to be added
   * @param time Time of the rule
   */
  public void add(Itemset itemset, long time) {
    int i;
    Item item;

    int[] antecedent = new int[n_variables];
    for (i = 0; i < n_variables; i++) antecedent[i] = -1; // Don't care

    for (i = 0; i < itemset.size(); i++) {
      item = itemset.get(i);
      antecedent[item.getVariable()] = item.getValue();
    }

    Rule r = new Rule(this.dataBase);
    r.asignaAntecedente(antecedent);
    r.setConsequent(itemset.getClas());
    r.setConfidence(itemset.getSupportClass() / itemset.getSupport());
    r.setSupport(itemset.getSupportClass());
    r.setTime(time);
    this.ruleBase.add(r);
  }
예제 #8
0
파일: DBWriter.java 프로젝트: rybak/ML-2015
  /**
   * Add a new row to the database. If this is to be the first row added to the database you must
   * have called setColumnNames() before.
   *
   * @param itemset the new row to be added to the data file
   * @exception IOException from library call
   * @exception DBException column names have not been set or an invalid item was contained in the
   *     itemset
   */
  public void addRow(Itemset itemset) throws IOException, DBException {
    if (wroteColumnNames == false) throw new DBException("Column names must be set first");

    int size = itemset.size();
    for (int i = 0; i < size; i++)
      if (itemset.get(i) > numColumns) throw new DBException("Attempt to write invalid item");

    if (needReposition == true) {
      outStream.seek(lastPosition);
      needReposition = false;
    }

    outStream.writeInt(size);
    CRC = updateCRC(CRC, size);

    int item;
    for (int i = 0; i < size; i++) {
      item = itemset.get(i);
      outStream.writeInt(item);
      CRC = updateCRC(CRC, item);
    }

    numRows++;
  }
예제 #9
0
  /** It launches the algorithm */
  public void execute() {
    if (somethingWrong) { // We do not execute the program
      System.err.println("An error was found");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      this.proc =
          new AlcalaetalProcess(
              this.trans,
              this.nEvaluations,
              this.popSize,
              this.nBitsGene,
              this.phi,
              this.d,
              this.nFuzzyRegionsForNumericAttributes,
              this.useMaxForOneFrequentItemsets,
              this.minSupport,
              this.minConfidence);
      this.proc.run();
      this.associationRulesSet = this.proc.getRulesSet();
      this.proc.printReport(this.associationRulesSet);

      /*for (int i=0; i < this.associationRulesSet.size(); i++) {
      	System.out.println(this.associationRulesSet.get(i));
      }*/

      try {
        int r, i;
        AssociationRule ar;
        Itemset itemset;

        this.saveFuzzyAttributes(
            this.uniformFuzzyAttributesFilename, this.proc.getUniformFuzzyAttributes());
        this.saveFuzzyAttributes(
            this.adjustedFuzzyAttributesFilename, this.proc.getAdjustedFuzzyAttributes());
        this.saveGeneticLearningLog(
            this.geneticLearningLogFilename, this.proc.getGeneticLearningLog());

        PrintWriter rules_writer = new PrintWriter(this.rulesFilename);
        PrintWriter values_writer = new PrintWriter(this.valuesFilename);

        rules_writer.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
        rules_writer.println("<rules>");

        values_writer.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
        values_writer.print("<values ");
        values_writer.println(
            "n_one_frequent_itemsets=\""
                + this.proc.getNumberOfOneFrequentItemsets()
                + "\" n_rules=\""
                + this.associationRulesSet.size()
                + "\">");

        for (r = 0; r < this.associationRulesSet.size(); r++) {
          ar = this.associationRulesSet.get(r);

          rules_writer.println("<rule id = \"" + r + "\" />");
          values_writer.println(
              "<rule id=\""
                  + r
                  + "\" rule_support=\""
                  + ar.getRuleSupport()
                  + "\" antecedent_support=\""
                  + ar.getAntecedentSupport()
                  + "\" confidence=\""
                  + ar.getConfidence()
                  + "\"/>");
          rules_writer.println("<antecedents>");
          itemset = ar.getAntecedent();

          for (i = 0; i < itemset.size(); i++)
            this.createRule(itemset.get(i), this.proc.getAdjustedFuzzyAttributes(), rules_writer);

          rules_writer.println("</antecedents>");

          rules_writer.println("<consequents>");
          itemset = ar.getConsequent();

          for (i = 0; i < itemset.size(); i++)
            this.createRule(itemset.get(i), this.proc.getAdjustedFuzzyAttributes(), rules_writer);

          rules_writer.println("</consequents>");

          rules_writer.println("</rule>");
        }

        rules_writer.println("</rules>");
        values_writer.println("</values>");

        rules_writer.close();
        values_writer.close();

        System.out.println("\nAlgorithm Finished");
      } catch (FileNotFoundException e) {
        e.printStackTrace();
      }
    }
  }
예제 #10
0
  /**
   * Find association rules in a database, given the set of frequent itemsets and a set of
   * restrictions.
   *
   * @param cacheReader the object used to read from the cache
   * @param minSupport the minimum support
   * @param minConfidence the minimum confidence
   * @param inAntecedent the items that must appear in the antecedent of each rule, if null then
   *     this constraint is ignored
   * @param inConsequent the items that must appear in the consequent of each rule, if null then
   *     this constraint is ignored
   * @param ignored the items that should be ignored, if null then this constraint is ignored
   * @param maxAntecedent the maximum number of items that can appear in the antecedent of each
   *     rule, if 0 then this constraint is ignored
   * @param minConsequent the minimum number of items that should appear in the consequent of each
   *     rule, if 0 then this constraint is ignored
   * @return a Vector containing all association rules found
   */
  public Vector findAssociations(
      DBCacheReader cacheReader,
      float minSupport,
      float minConfidence,
      Itemset inAntecedent,
      Itemset inConsequent,
      Itemset ignored,
      int maxAntecedent,
      int minConsequent) {
    min_support = minSupport;
    min_confidence = minConfidence;

    is_in_antecedent = inAntecedent;
    is_in_consequent = inConsequent;
    is_ignored = ignored;
    max_antecedent = maxAntecedent;
    min_consequent = minConsequent;

    // create the vector where we'll put the rules
    rules = new Vector();

    // read from cache supports of frequent itemsets
    initializeSupports(cacheReader);

    // get the frequent itemsets
    Vector frequent = supports.getItemsets();

    if (frequent.size() == 0) return rules;

    // if we need to ignore some items
    if (ignored != null) {
      // remove all frequent itemsets that contain
      // items to be ignored; their subsets that do
      // not contain those items will remain
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (is.doesIntersect(ignored)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // if we need to have some items in the antecedent or consequent
    if (inAntecedent != null || inConsequent != null) {
      // remove frequent itemsets that don't have the
      // required items
      for (int i = 0; i < frequent.size(); i++) {
        Itemset is = (Itemset) frequent.get(i);
        if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) {
          // replace this element with last, delete last,
          // and don't advance index
          frequent.set(i, frequent.lastElement());
          frequent.remove(frequent.size() - 1);
          i--;
        }
      }

      if (frequent.size() == 0) return rules;
    }

    // generate rules from each frequent itemset
    for (int i = 0; i < frequent.size(); i++) {
      // get a frequent itemset
      Itemset is_frequent = (Itemset) frequent.get(i);

      // skip it if it's too small
      if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue;

      // get all possible 1 item consequents
      Vector consequents = new Vector(is_frequent.size());
      for (int k = 0; k < is_frequent.size(); k++) {
        int item = is_frequent.getItem(k);
        Itemset is_consequent = new Itemset(1);
        is_consequent.addItem(item);

        // is_consequent now contains a possible consequent
        // verify now that the rule having this consequent
        // satisfies our requirements

        Itemset is_antecedent = is_frequent.subtract(is_consequent);
        float antecedent_support = (float) 0.00001;
        try {
          antecedent_support = supports.getSupport(is_antecedent);
        } catch (SETException e) {
          System.err.println("Error geting support from SET!!!\n" + e);
        }
        float confidence = is_frequent.getSupport() / antecedent_support;

        if (confidence >= min_confidence) {
          consequents.add(is_consequent);

          // check whether it also satisfies our constraints
          boolean approved = true;

          if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent))
            approved = false;

          if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent))
            approved = false;

          if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent)
            approved = false;

          if (approved && min_consequent > 0 && is_consequent.size() < min_consequent)
            approved = false;

          // if the rule satisifes all requirements then
          // we add it to the rules collection
          if (approved)
            rules.add(
                new AssociationRule(
                    is_antecedent, is_consequent, is_frequent.getSupport(), confidence));
        }
      }

      // call the ap-genrules procedure for generating all rules
      // out of this frequent itemset
      ap_genrules_constraint(is_frequent, consequents);
    }

    return rules;
  }