// this is the apriori_gen procedure that generates starting from // a k-itemset collection a new collection of (k+1)-itemsets. private Vector apriori_gen(Vector itemsets) { if (itemsets.size() == 0) return new Vector(0); // create a hashtree so that we can check more efficiently the // number of subsets // this may not really be necessary when generating rules since // itemsets will probably be a small collection, but just in case HashTree ht_itemsets = new HashTree(itemsets); for (int i = 0; i < itemsets.size(); i++) ht_itemsets.add(i); ht_itemsets.prepareForDescent(); Vector result = new Vector(); Itemset is_i, is_j; for (int i = 0; i < itemsets.size() - 1; i++) for (int j = i + 1; j < itemsets.size(); j++) { is_i = (Itemset) itemsets.get(i); is_j = (Itemset) itemsets.get(j); // if we cannot combine element i with j then we shouldn't // waste time for bigger j's. This is because we keep the // collections ordered, an important detail in this implementation if (!is_i.canCombineWith(is_j)) break; else { Itemset is = is_i.combineWith(is_j); // a real k-itemset has k (k-1)-subsets // so we test that this holds before adding to result if (ht_itemsets.countSubsets(is) == is.size()) result.add(is); } } return result; }