/** * Find association rules in a database, given the set of frequent itemsets. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @return a Vector containing all association rules found */ public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) { min_support = minSupport; min_confidence = minConfidence; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // we add the rule to our collection if it satisfies // our conditions rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap_genrules procedure for generating all rules // out of this frequent itemset ap_genrules(is_frequent, consequents); } return rules; }
// this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset) new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our confidence requirements if (confidence >= min_confidence) { // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules_constraint(is_frequent, new_consequents); } }
private void generateLarge(ArrayList<Itemset> Lk, int clas) { int i, j, size; ArrayList<Itemset> Lnew; Itemset newItemset, itemseti, itemsetj; size = Lk.size(); if (size > 1) { if (((Lk.get(0)).size() < this.nVariables) && ((Lk.get(0)).size() < this.depth)) { Lnew = new ArrayList<Itemset>(); for (i = 0; i < size - 1; i++) { itemseti = Lk.get(i); for (j = i + 1; j < size; j++) { itemsetj = Lk.get(j); if (this.isCombinable(itemseti, itemsetj)) { newItemset = itemseti.clone(); newItemset.add((itemsetj.get(itemsetj.size() - 1)).clone()); newItemset.calculateSupports(this.dataBase, this.train); if (newItemset.getSupportClass()[0] >= this.minsup) Lnew.add(newItemset); } } this.generateRules(Lnew, clas); this.generateLarge(Lnew, clas); Lnew.clear(); System.gc(); } } } }
// this is the apriori_gen procedure that generates starting from // a k-itemset collection a new collection of (k+1)-itemsets. private Vector apriori_gen(Vector itemsets) { if (itemsets.size() == 0) return new Vector(0); // create a hashtree so that we can check more efficiently the // number of subsets // this may not really be necessary when generating rules since // itemsets will probably be a small collection, but just in case HashTree ht_itemsets = new HashTree(itemsets); for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i); ht_itemsets.prepareForDescent(); Vector result = new Vector(); Itemset is_i, is_j; for (int i = 0; i < itemsets.size() - 1; i++) for (int j = i + 1; j < itemsets.size(); j++) { is_i = (Itemset) itemsets.get(i); is_j = (Itemset) itemsets.get(j); // if we cannot combine element i with j then we shouldn't // waste time for bigger j's. This is because we keep the // collections ordered, an important detail in this implementation if (!is_i.canCombineWith(is_j)) break; else { Itemset is = is_i.combineWith(is_j); // a real k-itemset has k (k-1)-subsets // so we test that this holds before adding to result if (ht_itemsets.countSubsets(is) == is.size()) result.add(is); } } return result; }
// this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset) new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our requirements we add it // to our collection if (confidence >= min_confidence) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules(is_frequent, new_consequents); } }
private boolean isCombinable(Itemset itemseti, Itemset itemsetj) { int i; Item itemi, itemj; Itemset itemset; itemi = itemseti.get(itemseti.size() - 1); itemj = itemsetj.get(itemseti.size() - 1); if (itemi.getVariable() >= itemj.getVariable()) return (false); return (true); }
/** * Function to check if the antecedent of our itemset is equal to another given * * @param a Itemset which antecedents we are going to compare with ours * @return boolean true = they are equal, false = they aren't. */ public boolean isEqualAnt(Itemset a) { int i; Item item; if (this.itemset.size() != a.size()) return (false); for (i = 0; i < this.itemset.size(); i++) { item = this.itemset.get(i); if (!item.isEqual(a.get(i))) return (false); } return (true); }
/** * It adds a rule to the rule base * * @param itemset itemset to be added * @param time Time of the rule */ public void add(Itemset itemset, long time) { int i; Item item; int[] antecedent = new int[n_variables]; for (i = 0; i < n_variables; i++) antecedent[i] = -1; // Don't care for (i = 0; i < itemset.size(); i++) { item = itemset.get(i); antecedent[item.getVariable()] = item.getValue(); } Rule r = new Rule(this.dataBase); r.asignaAntecedente(antecedent); r.setConsequent(itemset.getClas()); r.setConfidence(itemset.getSupportClass() / itemset.getSupport()); r.setSupport(itemset.getSupportClass()); r.setTime(time); this.ruleBase.add(r); }
/** * Find association rules in a database, given the set of frequent itemsets and a set of * restrictions. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @param inAntecedent the items that must appear in the antecedent of each rule, if null then * this constraint is ignored * @param inConsequent the items that must appear in the consequent of each rule, if null then * this constraint is ignored * @param ignored the items that should be ignored, if null then this constraint is ignored * @param maxAntecedent the maximum number of items that can appear in the antecedent of each * rule, if 0 then this constraint is ignored * @param minConsequent the minimum number of items that should appear in the consequent of each * rule, if 0 then this constraint is ignored * @return a Vector containing all association rules found */ public Vector findAssociations( DBCacheReader cacheReader, float minSupport, float minConfidence, Itemset inAntecedent, Itemset inConsequent, Itemset ignored, int maxAntecedent, int minConsequent) { min_support = minSupport; min_confidence = minConfidence; is_in_antecedent = inAntecedent; is_in_consequent = inConsequent; is_ignored = ignored; max_antecedent = maxAntecedent; min_consequent = minConsequent; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); if (frequent.size() == 0) return rules; // if we need to ignore some items if (ignored != null) { // remove all frequent itemsets that contain // items to be ignored; their subsets that do // not contain those items will remain for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (is.doesIntersect(ignored)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // if we need to have some items in the antecedent or consequent if (inAntecedent != null || inConsequent != null) { // remove frequent itemsets that don't have the // required items for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap-genrules procedure for generating all rules // out of this frequent itemset ap_genrules_constraint(is_frequent, consequents); } return rules; }