public void addItemset(Itemset itemset) { while (levels.size() <= itemset.getItems().size()) { levels.add(new ArrayList<Itemset>()); } levels.get(itemset.getItems().size()).add(itemset); itemsetsCount++; }
// Generate C(k+1) by join itemset-pairs in F(k) private static List<Itemset> generateCandidates(List<Itemset> frequentItemsets) { if (frequentItemsets.isEmpty() || frequentItemsets.size() == 1) { return new ArrayList<>(); } Collections.sort(frequentItemsets); List<Itemset> candidates = new ArrayList<>(); Itemset candidate; for (int i = 0, j = 1; i != frequentItemsets.size(); ) { while (j != frequentItemsets.size() && Itemset.generateCandidate(frequentItemsets.get(i), frequentItemsets.get(j)) != null) { ++j; } for (int k = i; k != j; ++k) { for (int l = k + 1; l != j; ++l) { Itemset itemset = (Itemset.generateCandidate(frequentItemsets.get(k), frequentItemsets.get(l))); assert itemset != null; candidates.add(itemset); } } i = j; j++; } return candidates; }
private void generateRules(ArrayList<Itemset> Lk, int clas) { int i, uncover; Itemset itemset; double confidence[] = new double[2]; double var1[] = new double[2]; double var2[] = new double[2]; for (i = Lk.size() - 1; i >= 0; i--) { itemset = Lk.get(i); var2 = itemset.getSupport(); if (var2[0] > 0.0) { var1 = itemset.getSupportClass(); confidence[0] = var1[0] / var2[0]; confidence[1] = var1[1] / var2[1]; } else { confidence[0] = confidence[1] = 0.0; } if (confidence[0] > 0.4) { this.ruleBaseClase.add(itemset); ruleStage1++; } if (confidence[0] > this.minconf) Lk.remove(i); } if (this.ruleBaseClase.size() > 500000) { this.ruleBaseClase.reduceRules(clas); // System.out.println("Number of rules: " + this.ruleBase.size()); System.gc(); } }
/** * Function to add all itemsets with unknown values for given attribute. * * @param source The dataset that contains all the itemsets. * @param attIndex The index of the attribute with possible unknown values. * @throws Exception */ public final void addWithUnknownValue(MyDataset source, int attIndex) { double[] probs; double weight, newWeight; int classIndex; Itemset itemset; int j; probs = new double[perValue.length]; for (j = 0; j < perValue.length; j++) { // if ( Comparators.isEqual( total, 0 ) ) if (total == 0) probs[j] = 1.0 / probs.length; else probs[j] = perValue[j] / total; } Enumeration enum2 = source.enumerateItemsets(); while (enum2.hasMoreElements()) { itemset = (Itemset) enum2.nextElement(); if (itemset.isMissing(attIndex)) { classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; for (j = 0; j < perValue.length; j++) { newWeight = probs[j] * weight; perClassPerValue[j][classIndex] = perClassPerValue[j][classIndex] + newWeight; perValue[j] = perValue[j] + newWeight; } } } }
/** * This method saves a sequential pattern to the output file or in memory, depending on if the * user provided an output file path or not when he launched the algorithm * * @param prefix the pattern to be saved. * @throws IOException exception if error while writing the output file. */ private void savePattern(SequentialPattern prefix) throws IOException { // increase the number of pattern found for statistics purposes patternCount++; // if the result should be saved to a file if (writer != null) { StringBuffer r = new StringBuffer(""); for (Itemset itemset : prefix.getItemsets()) { // r.append('('); for (String item : itemset.getItems()) { String string = item.toString(); r.append(string); r.append(' '); } r.append("-1 "); } // // // print the list of Pattern IDs that contains this pattern. // if(prefix.getSequencesID() != null){ // r.append("SID: "); // for(Integer id : prefix.getSequencesID()){ // r.append(id); // r.append(' '); // } // } r.append(" #SUP: "); r.append(prefix.getSequencesID().size()); writer.write(r.toString()); writer.newLine(); } // otherwise the result is kept into memory else { patterns.addSequence(prefix, prefix.size()); } }
// this is the apriori_gen procedure that generates starting from // a k-itemset collection a new collection of (k+1)-itemsets. private Vector apriori_gen(Vector itemsets) { if (itemsets.size() == 0) return new Vector(0); // create a hashtree so that we can check more efficiently the // number of subsets // this may not really be necessary when generating rules since // itemsets will probably be a small collection, but just in case HashTree ht_itemsets = new HashTree(itemsets); for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i); ht_itemsets.prepareForDescent(); Vector result = new Vector(); Itemset is_i, is_j; for (int i = 0; i < itemsets.size() - 1; i++) for (int j = i + 1; j < itemsets.size(); j++) { is_i = (Itemset) itemsets.get(i); is_j = (Itemset) itemsets.get(j); // if we cannot combine element i with j then we shouldn't // waste time for bigger j's. This is because we keep the // collections ordered, an important detail in this implementation if (!is_i.canCombineWith(is_j)) break; else { Itemset is = is_i.combineWith(is_j); // a real k-itemset has k (k-1)-subsets // so we test that this holds before adding to result if (ht_itemsets.countSubsets(is) == is.size()) result.add(is); } } return result; }
// this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset) new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our requirements we add it // to our collection if (confidence >= min_confidence) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules(is_frequent, new_consequents); } }
/** * Find association rules in a database, given the set of frequent itemsets. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @return a Vector containing all association rules found */ public Vector findAssociations(DBCacheReader cacheReader, float minSupport, float minConfidence) { min_support = minSupport; min_confidence = minConfidence; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // we add the rule to our collection if it satisfies // our conditions rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap_genrules procedure for generating all rules // out of this frequent itemset ap_genrules(is_frequent, consequents); } return rules; }
// Prune itemsets from C(k+1) that violate downward closure private static List<Itemset> prune(List<Itemset> candidates, List<Itemset> frequentItemsets) { List<Itemset> prunedCandicates = new ArrayList<>(); for (Itemset candidate : candidates) { if (frequentItemsets.containsAll(candidate.downwardClosure())) { prunedCandicates.add(candidate); } } return prunedCandicates; }
private void generateLarge(ArrayList<Itemset> Lk, int clas) { int i, j, size; ArrayList<Itemset> Lnew; Itemset newItemset, itemseti, itemsetj; size = Lk.size(); if (size > 1) { if (((Lk.get(0)).size() < this.nVariables) && ((Lk.get(0)).size() < this.depth)) { Lnew = new ArrayList<Itemset>(); for (i = 0; i < size - 1; i++) { itemseti = Lk.get(i); for (j = i + 1; j < size; j++) { itemsetj = Lk.get(j); if (this.isCombinable(itemseti, itemsetj)) { newItemset = itemseti.clone(); newItemset.add((itemsetj.get(itemsetj.size() - 1)).clone()); newItemset.calculateSupports(this.dataBase, this.train); if (newItemset.getSupportClass()[0] >= this.minsup) Lnew.add(newItemset); } } this.generateRules(Lnew, clas); this.generateLarge(Lnew, clas); Lnew.clear(); System.gc(); } } } }
private boolean isCombinable(Itemset itemseti, Itemset itemsetj) { int i; Item itemi, itemj; Itemset itemset; itemi = itemseti.get(itemseti.size() - 1); itemj = itemsetj.get(itemseti.size() - 1); if (itemi.getVariable() >= itemj.getVariable()) return (false); return (true); }
// this is the ap-genrules procedure that generates rules out // of a frequent itemset. private void ap_genrules_constraint(Itemset is_frequent, Vector consequents) { if (consequents.size() == 0) return; // the size of frequent must be bigger than the size of the itemsets // in consequents by at least 2, in order to be able to generate // a rule in this call if (is_frequent.size() > ((Itemset) (consequents.get(0))).size() + 1) { Vector new_consequents = apriori_gen(consequents); AssociationRule ar; for (int i = 0; i < new_consequents.size(); i++) { Itemset is_consequent = (Itemset) new_consequents.get(i); Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; // if the rule satisfies our confidence requirements if (confidence >= min_confidence) { // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } // otherwise we remove the consequent from the collection // and we update the index such that we don't skip a consequent else new_consequents.remove(i--); } ap_genrules_constraint(is_frequent, new_consequents); } }
/** * Function to add the given itemset to given the value. * * @param valueIndex The index of the value. * @param itemset The itemset to add. */ public final void add(int valueIndex, Itemset itemset) { int classIndex; double weight; classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClassPerValue[valueIndex][classIndex] = perClassPerValue[valueIndex][classIndex] + weight; perValue[valueIndex] = perValue[valueIndex] + weight; perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; }
/** * Function to read an itemset and appends it to the dataset. * * @return True if the itemset was readed succesfully. */ private boolean getItemsetFull() { // fill itemset for (int j = 0; j < IS.getNumInstances(); j++) { double[] itemset = new double[Attributes.getNumAttributes()]; int index; // Get values for all input attributes. for (int i = 0; i < Attributes.getInputNumAttributes(); i++) { // check type and if there is null if (IS.getInstance(j).getInputMissingValues(i)) itemset[i] = Itemset.getMissingValue(); else { if (Attributes.getInputAttribute(i).getType() == 0) // nominal { for (int k = 0; k < Attributes.getInputAttribute(i).getNumNominalValues(); k++) if (Attributes.getInputAttribute(i) .getNominalValue(k) .equals(IS.getInstance(j).getInputNominalValues(i))) itemset[i] = (double) k; } else // real and integer { itemset[i] = IS.getInstance(j).getInputRealValues(i); } } // else } // for // Get values for output attribute. int i = Attributes.getInputNumAttributes(); // check type and if there is null if (IS.getInstance(j).getOutputMissingValues(0)) itemset[i] = Itemset.getMissingValue(); else { if (Attributes.getOutputAttribute(0).getType() == 0) // nominal { for (int k = 0; k < Attributes.getOutputAttribute(0).getNumNominalValues(); k++) if (Attributes.getOutputAttribute(0) .getNominalValue(k) .equals(IS.getInstance(j).getOutputNominalValues(0))) itemset[i] = (double) k; } else // real and integer { itemset[i] = IS.getInstance(j).getOutputRealValues(0); } } // else // Add itemset to dataset addItemset(new Itemset(1, itemset)); } // for return true; }
/** * Funtion to add the given itemset to all values weighting it according to given weights. * * @param itemset The itemset to add. * @param weights The weights of the itemset for every value. */ public final void addWeights(Itemset itemset, double[] weights) { int classIndex; int i; classIndex = (int) itemset.getClassValue(); for (i = 0; i < perValue.length; i++) { double weight = itemset.getWeight() * weights[i]; perClassPerValue[i][classIndex] = perClassPerValue[i][classIndex] + weight; perValue[i] = perValue[i] + weight; perClass[classIndex] = perClass[classIndex] + weight; total = total + weight; } }
/** * Function to shift all itemsets in given range from one value to another. * * @param from The minimum value. * @param to The maximum value. * @param source The dataset. * @param start The index of the first itemset to add. * @param end The index of the first itemset that will not be added. */ public final void shiftRange(int from, int to, MyDataset source, int start, int end) { int classIndex; double weight; Itemset itemset; int i; for (i = start; i < end; i++) { itemset = (Itemset) source.itemset(i); classIndex = (int) itemset.getClassValue(); weight = itemset.getWeight(); perClassPerValue[from][classIndex] -= weight; perClassPerValue[to][classIndex] += weight; perValue[from] -= weight; perValue[to] += weight; } }
/** * Function to add all itemsets in given range to given value. * * @param valueIndex The index of the value. * @param source The source of the data. * @param start The index of the first itemset to add. * @param end The index of the first itemset that will not be added. * @throws Exception */ public final void addRange(int valueIndex, MyDataset source, int start, int end) { double sumOfWeights = 0; int classIndex; Itemset itemset; int i; for (i = start; i < end; i++) { itemset = (Itemset) source.itemset(i); classIndex = (int) itemset.getClassValue(); sumOfWeights = sumOfWeights + itemset.getWeight(); perClassPerValue[valueIndex][classIndex] += itemset.getWeight(); perClass[classIndex] += itemset.getWeight(); } perValue[valueIndex] += sumOfWeights; total += sumOfWeights; }
public int hasUncoverClass(int clas) { int uncover; double degree[] = new double[2]; Itemset itemset; boolean stop; uncover = 0; for (int j = 0; j < train.size(); j++) { if (this.train.getOutputAsInteger(j) == clas) { stop = false; for (int i = 0; i < L2.size() && !stop; i++) { itemset = L2.get(i); degree = itemset.degree(this.dataBase, this.train.getExample(j)); if (degree[0] > 0.0) stop = true; } if (!stop) uncover++; } } return uncover; }
// this method stores all frequent itemsets that have support // greater than the minimum support in a SET for more efficient // access times. private void initializeSupports(DBCacheReader cacheReader) { // create new SET supports = new SET(); try { Itemset is; while (true) { // get item from cache is = cacheReader.getNextItemset(); // if item has support greater than the minimum support // required then we add it to the SET if (is.getSupport() >= min_support) { supports.insert(is); } } } catch (EOFException e) { // do nothing, we just reached the EOF } catch (IOException e) { System.err.println("Error scanning cache!!!\n" + e); } catch (ClassNotFoundException e) { System.err.println("Error scanning cache!!!\n" + e); } }
/** * It adds a sequence from an array of string that we have to interpret * * @param integers * @param sequenceID */ public void addSequence(String[] integers, int sequenceID) { long timestamp = -1; Sequence sequence = new Sequence(sequences.size()); sequence.setID(sequenceID); Itemset itemset = new Itemset(); int inicio = 0; Map<Item, Boolean> counted = new HashMap<Item, Boolean>(); for (int i = inicio; i < integers.length; i++) { if (integers[i].codePointAt(0) == '<') { // Timestamp String value = integers[i].substring(1, integers[i].length() - 1); timestamp = Long.parseLong(value); itemset.setTimestamp(timestamp); } else if (integers[i].equals("-1")) { // end of an itemset long time = itemset.getTimestamp() + 1; sequence.addItemset(itemset); itemset = new Itemset(); itemset.setTimestamp(time); } else if (integers[i].equals("-2")) { // end of a sequence sequences.add(sequence); } else { // extract the value for an item Item item = itemFactory.getItem(Integer.parseInt(integers[i])); if (counted.get(item) == null) { counted.put(item, Boolean.TRUE); BitSet appearances = frequentItems.get(item); if (appearances == null) { appearances = new BitSet(); frequentItems.put(item, appearances); } appearances.set(sequence.getId()); } itemset.addItem(item); } } }
/** * Add a new row to the database. If this is to be the first row added to the database you must * have called setColumnNames() before. * * @param itemset the new row to be added to the data file * @exception IOException from library call * @exception DBException column names have not been set or an invalid item was contained in the * itemset */ public void addRow(Itemset itemset) throws IOException, DBException { if (wroteColumnNames == false) throw new DBException("Column names must be set first"); int size = itemset.size(); for (int i = 0; i < size; i++) if (itemset.get(i) > numColumns) throw new DBException("Attempt to write invalid item"); if (needReposition == true) { outStream.seek(lastPosition); needReposition = false; } outStream.writeInt(size); CRC = updateCRC(CRC, size); int item; for (int i = 0; i < size; i++) { item = itemset.get(i); outStream.writeInt(item); CRC = updateCRC(CRC, item); } numRows++; }
/** * Detects which leaf a itemset falls into * * @param itemset the itemset * @return the leaf no. */ public final int leafNum(Itemset itemset) { int lmNum = 0; if (type == false) { lmNum = lm; } else { if (itemset.getValue(splitAttr) <= splitValue) { lmNum = leftNode.leafNum(itemset); } else { lmNum = rightNode.leafNum(itemset); } } return lmNum; }
/** * Predicts the class value of an itemset by the tree * * @param itemset the itemset * @param smooth =true, uses the smoothed model; otherwise uses the unsmoothed * @inst itemsets * @return the predicted value */ public final double predict(Itemset itemset, boolean smooth) { double y = 0.0; if (type == false) { // LEAF if (smooth == true) { y = smoothed.predict(itemset); } else { if (valueNode == true) { y = unsmoothed.coeffs[0]; } else { y = unsmoothed.predict(itemset); } } } else { // NODE if (itemset.getValue(splitAttr) <= splitValue) { y = leftNode.predict(itemset, smooth); } else { y = rightNode.predict(itemset, smooth); } } return y; }
/** * It adds a rule to the rule base * * @param itemset itemset to be added * @param time Time of the rule */ public void add(Itemset itemset, long time) { int i; Item item; int[] antecedent = new int[n_variables]; for (i = 0; i < n_variables; i++) antecedent[i] = -1; // Don't care for (i = 0; i < itemset.size(); i++) { item = itemset.get(i); antecedent[item.getVariable()] = item.getValue(); } Rule r = new Rule(this.dataBase); r.asignaAntecedente(antecedent); r.setConsequent(itemset.getClas()); r.setConfidence(itemset.getSupportClass() / itemset.getSupport()); r.setSupport(itemset.getSupportClass()); r.setTime(time); this.ruleBase.add(r); }
private void generateL2(int clas) { int i, j, k, uncover; Item item; Itemset itemset; this.L2.clear(); itemset = new Itemset(clas); for (i = 0; i < this.nVariables; i++) { if (this.dataBase.numLabels(i) > 1) { for (j = 0; j < this.dataBase.numLabels(i); j++) { item = new Item(i, j); itemset.add(item); itemset.calculateSupports(this.dataBase, this.train); if (itemset.getSupportClass()[0] >= this.minsup) this.L2.add(itemset.clone()); itemset.remove(0); } } } this.generateRules(this.L2, clas); }
/** sample usage and testing */ public static void main(String[] args) { Itemset is1 = new Itemset(); is1.add(1); is1.add(2); Itemset is2 = new Itemset(); is2.add(3); is2.add(2); Itemset is3 = new Itemset(); is3.add(3); is3.add(1); Itemset is4 = new Itemset(); is4.add(33); is4.add(3); ArrayList colNames = new ArrayList(3); colNames.add("cheese"); colNames.add("pizza"); colNames.add("beer"); System.out.println("\n\nCreating invalid database:"); try { RandomAccessFile invalid = new RandomAccessFile("invalid.db", "rw"); invalid.writeChars(ID + " - a bogus file that looks like a valid one"); invalid.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nCreating corrupted database:"); try { DBWriter corrupted = new DBWriter("corrupted.db"); try { corrupted.addRow(is1); } catch (DBException e) { System.out.println(e); } corrupted.setDescription("a corrupted database"); corrupted.setColumnNames(colNames); corrupted.addRow(is1); corrupted.setDescription("a corrupted database - 2"); corrupted.setColumnNames(colNames); corrupted.addRow(is2); corrupted.setDescription("a corrupted database - 3"); corrupted.addRow(is3); try { corrupted.addRow(is4); } catch (DBException e) { System.out.println(e); } corrupted.close(); System.out.println("corrupting file"); RandomAccessFile raf = new RandomAccessFile("corrupted.db", "rw"); raf.seek(770); // replace the 2 in the second itemset with a 3 raf.writeInt(3); raf.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nCreating empty database:"); try { DBWriter empty = new DBWriter("empty.db"); empty.setDescription("an empty database"); empty.setColumnNames(colNames); empty.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nCreating correct database:"); try { DBWriter correct = new DBWriter("correct.db"); correct.setDescription("a correct database"); correct.setColumnNames(colNames); correct.addRow(is1); correct.setDescription("a correct database - 2"); correct.setColumnNames(colNames); correct.addRow(is2); correct.setDescription("a correct database - 3"); correct.addRow(is3); correct.close(); correct = new DBWriter("correct.db"); correct.setColumnNames(colNames); correct.addRow(is1); correct.setDescription("a correct database - 4"); correct.setColumnNames(colNames); correct.addRow(is2); correct.setDescription("a correct database - 5"); correct.addRow(is3); correct.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nOpening and closing DBWriter:"); try { DBWriter bummer = new DBWriter("bummer.db"); bummer.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } }
/** * Find association rules in a database, given the set of frequent itemsets and a set of * restrictions. * * @param cacheReader the object used to read from the cache * @param minSupport the minimum support * @param minConfidence the minimum confidence * @param inAntecedent the items that must appear in the antecedent of each rule, if null then * this constraint is ignored * @param inConsequent the items that must appear in the consequent of each rule, if null then * this constraint is ignored * @param ignored the items that should be ignored, if null then this constraint is ignored * @param maxAntecedent the maximum number of items that can appear in the antecedent of each * rule, if 0 then this constraint is ignored * @param minConsequent the minimum number of items that should appear in the consequent of each * rule, if 0 then this constraint is ignored * @return a Vector containing all association rules found */ public Vector findAssociations( DBCacheReader cacheReader, float minSupport, float minConfidence, Itemset inAntecedent, Itemset inConsequent, Itemset ignored, int maxAntecedent, int minConsequent) { min_support = minSupport; min_confidence = minConfidence; is_in_antecedent = inAntecedent; is_in_consequent = inConsequent; is_ignored = ignored; max_antecedent = maxAntecedent; min_consequent = minConsequent; // create the vector where we'll put the rules rules = new Vector(); // read from cache supports of frequent itemsets initializeSupports(cacheReader); // get the frequent itemsets Vector frequent = supports.getItemsets(); if (frequent.size() == 0) return rules; // if we need to ignore some items if (ignored != null) { // remove all frequent itemsets that contain // items to be ignored; their subsets that do // not contain those items will remain for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (is.doesIntersect(ignored)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // if we need to have some items in the antecedent or consequent if (inAntecedent != null || inConsequent != null) { // remove frequent itemsets that don't have the // required items for (int i = 0; i < frequent.size(); i++) { Itemset is = (Itemset) frequent.get(i); if (inAntecedent != null && !inAntecedent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } else if (inConsequent != null && !inConsequent.isIncludedIn(is)) { // replace this element with last, delete last, // and don't advance index frequent.set(i, frequent.lastElement()); frequent.remove(frequent.size() - 1); i--; } } if (frequent.size() == 0) return rules; } // generate rules from each frequent itemset for (int i = 0; i < frequent.size(); i++) { // get a frequent itemset Itemset is_frequent = (Itemset) frequent.get(i); // skip it if it's too small if (is_frequent.size() <= 1 || is_frequent.size() <= minConsequent) continue; // get all possible 1 item consequents Vector consequents = new Vector(is_frequent.size()); for (int k = 0; k < is_frequent.size(); k++) { int item = is_frequent.getItem(k); Itemset is_consequent = new Itemset(1); is_consequent.addItem(item); // is_consequent now contains a possible consequent // verify now that the rule having this consequent // satisfies our requirements Itemset is_antecedent = is_frequent.subtract(is_consequent); float antecedent_support = (float) 0.00001; try { antecedent_support = supports.getSupport(is_antecedent); } catch (SETException e) { System.err.println("Error geting support from SET!!!\n" + e); } float confidence = is_frequent.getSupport() / antecedent_support; if (confidence >= min_confidence) { consequents.add(is_consequent); // check whether it also satisfies our constraints boolean approved = true; if (approved && is_in_antecedent != null && !is_in_antecedent.isIncludedIn(is_antecedent)) approved = false; if (approved && is_in_consequent != null && !is_in_consequent.isIncludedIn(is_consequent)) approved = false; if (approved && max_antecedent > 0 && is_antecedent.size() > max_antecedent) approved = false; if (approved && min_consequent > 0 && is_consequent.size() < min_consequent) approved = false; // if the rule satisifes all requirements then // we add it to the rules collection if (approved) rules.add( new AssociationRule( is_antecedent, is_consequent, is_frequent.getSupport(), confidence)); } } // call the ap-genrules procedure for generating all rules // out of this frequent itemset ap_genrules_constraint(is_frequent, consequents); } return rules; }
/** It launches the algorithm */ public void execute() { if (somethingWrong) { // We do not execute the program System.err.println("An error was found"); System.err.println("Aborting the program"); // We should not use the statement: System.exit(-1); } else { this.proc = new AlcalaetalProcess( this.trans, this.nEvaluations, this.popSize, this.nBitsGene, this.phi, this.d, this.nFuzzyRegionsForNumericAttributes, this.useMaxForOneFrequentItemsets, this.minSupport, this.minConfidence); this.proc.run(); this.associationRulesSet = this.proc.getRulesSet(); this.proc.printReport(this.associationRulesSet); /*for (int i=0; i < this.associationRulesSet.size(); i++) { System.out.println(this.associationRulesSet.get(i)); }*/ try { int r, i; AssociationRule ar; Itemset itemset; this.saveFuzzyAttributes( this.uniformFuzzyAttributesFilename, this.proc.getUniformFuzzyAttributes()); this.saveFuzzyAttributes( this.adjustedFuzzyAttributesFilename, this.proc.getAdjustedFuzzyAttributes()); this.saveGeneticLearningLog( this.geneticLearningLogFilename, this.proc.getGeneticLearningLog()); PrintWriter rules_writer = new PrintWriter(this.rulesFilename); PrintWriter values_writer = new PrintWriter(this.valuesFilename); rules_writer.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); rules_writer.println("<rules>"); values_writer.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); values_writer.print("<values "); values_writer.println( "n_one_frequent_itemsets=\"" + this.proc.getNumberOfOneFrequentItemsets() + "\" n_rules=\"" + this.associationRulesSet.size() + "\">"); for (r = 0; r < this.associationRulesSet.size(); r++) { ar = this.associationRulesSet.get(r); rules_writer.println("<rule id = \"" + r + "\" />"); values_writer.println( "<rule id=\"" + r + "\" rule_support=\"" + ar.getRuleSupport() + "\" antecedent_support=\"" + ar.getAntecedentSupport() + "\" confidence=\"" + ar.getConfidence() + "\"/>"); rules_writer.println("<antecedents>"); itemset = ar.getAntecedent(); for (i = 0; i < itemset.size(); i++) this.createRule(itemset.get(i), this.proc.getAdjustedFuzzyAttributes(), rules_writer); rules_writer.println("</antecedents>"); rules_writer.println("<consequents>"); itemset = ar.getConsequent(); for (i = 0; i < itemset.size(); i++) this.createRule(itemset.get(i), this.proc.getAdjustedFuzzyAttributes(), rules_writer); rules_writer.println("</consequents>"); rules_writer.println("</rule>"); } rules_writer.println("</rules>"); values_writer.println("</values>"); rules_writer.close(); values_writer.close(); System.out.println("\nAlgorithm Finished"); } catch (FileNotFoundException e) { e.printStackTrace(); } } }
/** * This method creates a copy of the sequence and add a given item to the last itemset of the * sequence. It sets the support of the sequence as the support of the item. * * @param prefix the sequence * @param item the item * @return the new sequence */ private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, String item) { SequentialPattern newPrefix = prefix.cloneSequence(); Itemset itemset = newPrefix.get(newPrefix.size() - 1); itemset.addItem(item); return newPrefix; }
/** * Function to add one itemset. * * @param itemset The itemset to add to the dataset. */ public final void addItemset(Itemset itemset) { Itemset newItemset = (Itemset) itemset.copy(); newItemset.setDataset(this); itemsets.addElement(newItemset); }