// this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset) new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our requirements we add it // to our collection if (confidence >= min_confidence) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules(is_frequent, new_consequents); } }
// this is the apriori_gen procedure that generates starting from // a k-itemset collection a new collection of (k+1)-itemsets. private Vector apriori_gen(Vector itemsets) { if (itemsets.size() == 0) return new Vector(0); // create a hashtree so that we can check more efficiently the // number of subsets // this may not really be necessary when generating rules since // itemsets will probably be a small collection, but just in case HashTree ht_itemsets = new HashTree(itemsets); for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i); ht_itemsets.prepareForDescent(); Vector result = new Vector(); Itemset is_i, is_j; for (int i = 0; i < itemsets.size() - 1; i++) for (int j = i + 1; j < itemsets.size(); j++) { is_i = (Itemset) itemsets.get(i); is_j = (Itemset) itemsets.get(j); // if we cannot combine element i with j then we shouldn't // waste time for bigger j's. This is because we keep the // collections ordered, an important detail in this implementation if (!is_i.canCombineWith(is_j)) break; else { Itemset is = is_i.combineWith(is_j); // a real k-itemset has k (k-1)-subsets // so we test that this holds before adding to result if (ht_itemsets.countSubsets(is) == is.size()) result.add(is); } } return result; }
private void generateRules(ArrayList<Itemset> Lk, int clas) { int i, uncover; Itemset itemset; double confidence[] = new double[2]; double var1[] = new double[2]; double var2[] = new double[2]; for (i = Lk.size() - 1; i >= 0; i--) { itemset = Lk.get(i); var2 = itemset.getSupport(); if (var2[0] > 0.0) { var1 = itemset.getSupportClass(); confidence[0] = var1[0] / var2[0]; confidence[1] = var1[1] / var2[1]; } else { confidence[0] = confidence[1] = 0.0; } if (confidence[0] > 0.4) { this.ruleBaseClase.add(itemset); ruleStage1++; } if (confidence[0] > this.minconf) Lk.remove(i); } if (this.ruleBaseClase.size() > 500000) { this.ruleBaseClase.reduceRules(clas); // System.out.println("Number of rules: " + this.ruleBase.size()); System.gc(); } }
// Generate C(k+1) by join itemset-pairs in F(k) private static List<Itemset> generateCandidates(List<Itemset> frequentItemsets) { if (frequentItemsets.isEmpty() || frequentItemsets.size() == 1) { return new ArrayList<>(); } Collections.sort(frequentItemsets); List<Itemset> candidates = new ArrayList<>(); Itemset candidate; for (int i = 0, j = 1; i != frequentItemsets.size(); ) { while (j != frequentItemsets.size() && Itemset.generateCandidate(frequentItemsets.get(i), frequentItemsets.get(j)) != null) { ++j; } for (int k = i; k != j; ++k) { for (int l = k + 1; l != j; ++l) { Itemset itemset = (Itemset.generateCandidate(frequentItemsets.get(k), frequentItemsets.get(l))); assert itemset != null; candidates.add(itemset); } } i = j; j++; } return candidates; }
/** * Find association rules in a database, given the set of frequent itemsets. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @return a Vector containing all association rules found */ public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) { min_support = minSupport; min_confidence = minConfidence; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // we add the rule to our collection if it satisfies // our conditions rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap_genrules procedure for generating all rules // out of this frequent itemset ap_genrules(is_frequent, consequents); } return rules; }
// Prune itemsets from C(k+1) that violate downward closure private static List<Itemset> prune(List<Itemset> candidates, List<Itemset> frequentItemsets) { List<Itemset> prunedCandicates = new ArrayList<>(); for (Itemset candidate : candidates) { if (frequentItemsets.containsAll(candidate.downwardClosure())) { prunedCandicates.add(candidate); } } return prunedCandicates; }
private void generateLarge(ArrayList<Itemset> Lk, int clas) { int i, j, size; ArrayList<Itemset> Lnew; Itemset newItemset, itemseti, itemsetj; size = Lk.size(); if (size > 1) { if (((Lk.get(0)).size() < this.nVariables) && ((Lk.get(0)).size() < this.depth)) { Lnew = new ArrayList<Itemset>(); for (i = 0; i < size - 1; i++) { itemseti = Lk.get(i); for (j = i + 1; j < size; j++) { itemsetj = Lk.get(j); if (this.isCombinable(itemseti, itemsetj)) { newItemset = itemseti.clone(); newItemset.add((itemsetj.get(itemsetj.size() - 1)).clone()); newItemset.calculateSupports(this.dataBase, this.train); if (newItemset.getSupportClass()[0] >= this.minsup) Lnew.add(newItemset); } } this.generateRules(Lnew, clas); this.generateLarge(Lnew, clas); Lnew.clear(); System.gc(); } } } }
private boolean isCombinable(Itemset itemseti, Itemset itemsetj) { int i; Item itemi, itemj; Itemset itemset; itemi = itemseti.get(itemseti.size() - 1); itemj = itemsetj.get(itemseti.size() - 1); if (itemi.getVariable() >= itemj.getVariable()) return (false); return (true); }
/** * Function to add the given itemset to given the value. * * @param valueIndex The index of the value. * @param itemset The itemset to add. */ public final void add(int valueIndex, Itemset itemset) { int classIndex; double weight; classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClassPerValue[valueIndex][classIndex] = perClassPerValue[valueIndex][classIndex] + weight; perValue[valueIndex] = perValue[valueIndex] + weight; perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; }
// this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset) new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our confidence requirements if (confidence >= min_confidence) { // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules_constraint(is_frequent, new_consequents); } }
/** * Function to read an itemset and appends it to the dataset. * * @return True if the itemset was readed succesfully. */ private boolean getItemsetFull() { // fill itemset for (int j = 0; j < IS.getNumInstances(); j++) { double[] itemset = new double[Attributes.getNumAttributes()]; int index; // Get values for all input attributes. for (int i = 0; i < Attributes.getInputNumAttributes(); i++) { // check type and if there is null if (IS.getInstance(j).getInputMissingValues(i)) itemset[i] = Itemset.getMissingValue(); else { if (Attributes.getInputAttribute(i).getType() == 0) // nominal { for (int k = 0; k < Attributes.getAttribute(i).getNumNominalValues(); k++) if (Attributes.getAttribute(i) .getNominalValue(k) .equals(IS.getInstance(j).getInputNominalValues(i))) itemset[i] = (double) k; } else // real and integer { itemset[i] = IS.getInstance(j).getInputRealValues(i); } } // else } // for // Get values for output attribute. int i = Attributes.getInputNumAttributes(); // check type and if there is null if (IS.getInstance(j).getOutputMissingValues(0)) itemset[i] = Itemset.getMissingValue(); else { if (Attributes.getOutputAttribute(0).getType() == 0) // nominal { for (int k = 0; k < Attributes.getOutputAttribute(0).getNumNominalValues(); k++) if (Attributes.getOutputAttribute(0) .getNominalValue(k) .equals(IS.getInstance(j).getOutputNominalValues(0))) itemset[i] = (double) k; } else // real and integer { itemset[i] = IS.getInstance(j).getOutputRealValues(0); } } // else // Add itemset to dataset addItemset(new Itemset(1, itemset)); } // for return true; }
/** * Function to check if the antecedent of our itemset is equal to another given * * @param a Itemset which antecedents we are going to compare with ours * @return boolean true = they are equal, false = they aren't. */ public boolean isEqualAnt(Itemset a) { int i; Item item; if (this.itemset.size() != a.size()) return (false); for (i = 0; i < this.itemset.size(); i++) { item = this.itemset.get(i); if (!item.isEqual(a.get(i))) return (false); } return (true); }
/** * Funtion to add the given itemset to all values weighting it according to given weights. * * @param itemset The itemset to add. * @param weights The weights of the itemset for every value. */ public final void addWeights(Itemset itemset, double[] weights) { int classIndex; int i; classIndex = (int) itemset.getClassValue(); for (i = 0; i < perValue.length; i++) { double weight = itemset.getWeight() * weights[i]; perClassPerValue[i][classIndex] = perClassPerValue[i][classIndex] + weight; perValue[i] = perValue[i] + weight; perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; } }
/** * Function to shift all itemsets in given range from one value to another. * * @param from The minimum value. * @param to The maximum value. * @param source The dataset. * @param start The index of the first itemset to add. * @param end The index of the first itemset that will not be added. */ public final void shiftRange(int from, int to, Dataset source, int start, int end) { int classIndex; double weight; Itemset itemset; int i; for (i = start; i < end; i++) { itemset = (Itemset) source.itemset(i); classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClassPerValue[from][classIndex] -= weight; perClassPerValue[to][classIndex] += weight; perValue[from] -= weight; perValue[to] += weight; } }
/** * Function to add all itemsets in given range to given value. * * @param valueIndex The index of the value. * @param itemset The itemset to add. * @param start The index of the first itemset to add. * @param end The index of the first itemset that will not be added. * @throws Exception */ public final void addRange(int valueIndex, Dataset source, int start, int end) { double sumOfWeights = 0; int classIndex; Itemset itemset; int i; for (i = start; i < end; i++) { itemset = (Itemset) source.itemset(i); classIndex = (int) itemset.getClassValue(); sumOfWeights = sumOfWeights + itemset.getWeight(); perClassPerValue[valueIndex][classIndex] += itemset.getWeight(); perClass[classIndex] += itemset.getWeight(); } perValue[valueIndex] += sumOfWeights; total += sumOfWeights; }
/** Clone function */ public Itemset clone() { Itemset d = new Itemset(this.clas); for (int i = 0; i < this.itemset.size(); i++) d.add((itemset.get(i)).clone()); d.clas = this.clas; d.support = this.support; d.supportRule = this.supportRule; d.per = this.per; d.hits = this.hits; d.misses = this.misses; return (d); }
public int hasUncoverClass(int clas) { int uncover; double degree[] = new double[2]; Itemset itemset; boolean stop; uncover = 0; for (int j = 0; j < train.size(); j++) { if (this.train.getOutputAsInteger(j) == clas) { stop = false; for (int i = 0; i < L2.size() && !stop; i++) { itemset = L2.get(i); degree = itemset.degree(this.dataBase, this.train.getExample(j)); if (degree[0] > 0.0) stop = true; } if (!stop) uncover++; } } return uncover; }
// this method stores all frequent itemsets that have support // greater than the minimum support in a SET for more efficient // access times. private void initializeSupports(DBCacheReader cacheReader) { // create new SET supports = new SET(); try { Itemset is; while (true) { // get item from cache is = cacheReader.getNextItemset(); // if item has support greater than the minimum support // required then we add it to the SET if (is.getSupport() >= min_support) { supports.insert(is); } } } catch (EOFException e) { // do nothing, we just reached the EOF } catch (IOException e) { System.err.println("Error scanning cache!!!\n" + e); } catch (ClassNotFoundException e) { System.err.println("Error scanning cache!!!\n" + e); } }
/** * It adds a sequence from an array of string that we have to interpret * * @param integers * @param sequenceID */ public void addSequence(String[] integers, int sequenceID) { long timestamp = -1; Sequence sequence = new Sequence(sequences.size()); sequence.setID(sequenceID); Itemset itemset = new Itemset(); int inicio = 0; Map<Item, Boolean> counted = new HashMap<Item, Boolean>(); for (int i = inicio; i < integers.length; i++) { if (integers[i].codePointAt(0) == '<') { // Timestamp String value = integers[i].substring(1, integers[i].length() - 1); timestamp = Long.parseLong(value); itemset.setTimestamp(timestamp); } else if (integers[i].equals("-1")) { // end of an itemset long time = itemset.getTimestamp() + 1; sequence.addItemset(itemset); itemset = new Itemset(); itemset.setTimestamp(time); } else if (integers[i].equals("-2")) { // end of a sequence sequences.add(sequence); } else { // extract the value for an item Item item = itemFactory.getItem(Integer.parseInt(integers[i])); if (counted.get(item) == null) { counted.put(item, Boolean.TRUE); BitSet appearances = frequentItems.get(item); if (appearances == null) { appearances = new BitSet(); frequentItems.put(item, appearances); } appearances.set(sequence.getId()); } itemset.addItem(item); } } }
/** * Function to add all itemsets with unknown values for given attribute. * * @param source The dataset that contains all the itemsets. * @param attIndex The index of the attribute with possible unknown values. * @throws Exception */ public final void addWithUnknownValue(Dataset source, int attIndex) { double[] probs; double weight, newWeight; int classIndex; Itemset itemset; int j; probs = new double[perValue.length]; for (j = 0; j < perValue.length; j++) { // if ( Comparators.isEqual( total, 0 ) ) if (total == 0) { probs[j] = 1.0 / probs.length; } else { probs[j] = perValue[j] / total; } } Enumeration enum2 = source.enumerateItemsets(); while (enum2.hasMoreElements()) { itemset = (Itemset) enum2.nextElement(); if (itemset.isMissing(attIndex)) { classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; for (j = 0; j < perValue.length; j++) { newWeight = probs[j] * weight; perClassPerValue[j][classIndex] = perClassPerValue[j][classIndex] + newWeight; perValue[j] = perValue[j] + newWeight; } } } }
/** * Function to check if our itemset is Subitemset (can be contained) of a given itemset * * @param a Itemset to check if can contain ours * @return boolean true = our itemset is subitemset of a, false = it isn't. */ public boolean isSubItemset(Itemset a) { int i, j; Item itemi, itemj; boolean stop; if (this.clas != a.getClas()) return (false); for (i = 0; i < this.itemset.size(); i++) { itemi = this.itemset.get(i); stop = false; for (j = 0; j < a.itemset.size() && !stop; j++) { itemj = a.itemset.get(j); if (itemi.isEqual(itemj)) stop = true; else if (itemj.getVariable() >= itemi.getVariable()) return (false); } if (!stop) return (false); } return (true); }
/** * It adds a rule to the rule base * * @param itemset itemset to be added * @param time Time of the rule */ public void add(Itemset itemset, long time) { int i; Item item; int[] antecedent = new int[n_variables]; for (i = 0; i < n_variables; i++) antecedent[i] = -1; // Don't care for (i = 0; i < itemset.size(); i++) { item = itemset.get(i); antecedent[item.getVariable()] = item.getValue(); } Rule r = new Rule(this.dataBase); r.asignaAntecedente(antecedent); r.setConsequent(itemset.getClas()); r.setConfidence(itemset.getSupportClass() / itemset.getSupport()); r.setSupport(itemset.getSupportClass()); r.setTime(time); this.ruleBase.add(r); }
private void generateL2(int clas) { int i, j, k, uncover; Item item; Itemset itemset; this.L2.clear(); itemset = new Itemset(clas); for (i = 0; i < this.nVariables; i++) { if (this.dataBase.numLabels(i) > 1) { for (j = 0; j < this.dataBase.numLabels(i); j++) { item = new Item(i, j); itemset.add(item); itemset.calculateSupports(this.dataBase, this.train); if (itemset.getSupportClass()[0] >= this.minsup) this.L2.add(itemset.clone()); itemset.remove(0); } } } this.generateRules(this.L2, clas); }
/** * Function to add one itemset. * * @param itemset The itemset to add to the dataset. */ public final void addItemset(Itemset itemset) { Itemset newItemset = (Itemset) itemset.copy(); newItemset.setDataset(this); itemsets.addElement(newItemset); }
/** * Find association rules in a database, given the set of frequent itemsets and a set of * restrictions. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @param inAntecedent the items that must appear in the antecedent of each rule, if null then * this constraint is ignored * @param inConsequent the items that must appear in the consequent of each rule, if null then * this constraint is ignored * @param ignored the items that should be ignored, if null then this constraint is ignored * @param maxAntecedent the maximum number of items that can appear in the antecedent of each * rule, if 0 then this constraint is ignored * @param minConsequent the minimum number of items that should appear in the consequent of each * rule, if 0 then this constraint is ignored * @return a Vector containing all association rules found */ public Vector findAssociations( DBCacheReader cacheReader, float minSupport, float minConfidence, Itemset inAntecedent, Itemset inConsequent, Itemset ignored, int maxAntecedent, int minConsequent) { min_support = minSupport; min_confidence = minConfidence; is_in_antecedent = inAntecedent; is_in_consequent = inConsequent; is_ignored = ignored; max_antecedent = maxAntecedent; min_consequent = minConsequent; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); if (frequent.size() == 0) return rules; // if we need to ignore some items if (ignored != null) { // remove all frequent itemsets that contain // items to be ignored; their subsets that do // not contain those items will remain for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (is.doesIntersect(ignored)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // if we need to have some items in the antecedent or consequent if (inAntecedent != null || inConsequent != null) { // remove frequent itemsets that don't have the // required items for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap-genrules procedure for generating all rules // out of this frequent itemset ap_genrules_constraint(is_frequent, consequents); } return rules; }