/** * Find association rules in a database, given the set of frequent itemsets. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @return a Vector containing all association rules found */ public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) { min_support = minSupport; min_confidence = minConfidence; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // we add the rule to our collection if it satisfies // our conditions rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap_genrules procedure for generating all rules // out of this frequent itemset ap_genrules(is_frequent, consequents); } return rules; }
/** * It adds a sequence from an array of string that we have to interpret * * @param integers * @param sequenceID */ public void addSequence(String[] integers, int sequenceID) { long timestamp = -1; Sequence sequence = new Sequence(sequences.size()); sequence.setID(sequenceID); Itemset itemset = new Itemset(); int inicio = 0; Map<Item, Boolean> counted = new HashMap<Item, Boolean>(); for (int i = inicio; i < integers.length; i++) { if (integers[i].codePointAt(0) == '<') { // Timestamp String value = integers[i].substring(1, integers[i].length() - 1); timestamp = Long.parseLong(value); itemset.setTimestamp(timestamp); } else if (integers[i].equals("-1")) { // end of an itemset long time = itemset.getTimestamp() + 1; sequence.addItemset(itemset); itemset = new Itemset(); itemset.setTimestamp(time); } else if (integers[i].equals("-2")) { // end of a sequence sequences.add(sequence); } else { // extract the value for an item Item item = itemFactory.getItem(Integer.parseInt(integers[i])); if (counted.get(item) == null) { counted.put(item, Boolean.TRUE); BitSet appearances = frequentItems.get(item); if (appearances == null) { appearances = new BitSet(); frequentItems.put(item, appearances); } appearances.set(sequence.getId()); } itemset.addItem(item); } } }
/** * This method creates a copy of the sequence and add a given item to the last itemset of the * sequence. It sets the support of the sequence as the support of the item. * * @param prefix the sequence * @param item the item * @return the new sequence */ private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, String item) { SequentialPattern newPrefix = prefix.cloneSequence(); Itemset itemset = newPrefix.get(newPrefix.size() - 1); itemset.addItem(item); return newPrefix; }
/** * Find association rules in a database, given the set of frequent itemsets and a set of * restrictions. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @param inAntecedent the items that must appear in the antecedent of each rule, if null then * this constraint is ignored * @param inConsequent the items that must appear in the consequent of each rule, if null then * this constraint is ignored * @param ignored the items that should be ignored, if null then this constraint is ignored * @param maxAntecedent the maximum number of items that can appear in the antecedent of each * rule, if 0 then this constraint is ignored * @param minConsequent the minimum number of items that should appear in the consequent of each * rule, if 0 then this constraint is ignored * @return a Vector containing all association rules found */ public Vector findAssociations( DBCacheReader cacheReader, float minSupport, float minConfidence, Itemset inAntecedent, Itemset inConsequent, Itemset ignored, int maxAntecedent, int minConsequent) { min_support = minSupport; min_confidence = minConfidence; is_in_antecedent = inAntecedent; is_in_consequent = inConsequent; is_ignored = ignored; max_antecedent = maxAntecedent; min_consequent = minConsequent; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); if (frequent.size() == 0) return rules; // if we need to ignore some items if (ignored != null) { // remove all frequent itemsets that contain // items to be ignored; their subsets that do // not contain those items will remain for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (is.doesIntersect(ignored)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // if we need to have some items in the antecedent or consequent if (inAntecedent != null || inConsequent != null) { // remove frequent itemsets that don't have the // required items for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap-genrules procedure for generating all rules // out of this frequent itemset ap_genrules_constraint(is_frequent, consequents); } return rules; }