// Determine F(k+1) by support counting on (C(K+1), T) and retaining itemsets from C(k+1) with // support at least minsup private static List<Itemset> determineFrequentItemsets( List<Itemset> candicates, List<Transaction> transactions, double minsup) { if (candicates.isEmpty()) { return null; } HashTree hashTree = new HashTree(candicates, candicates.get(0).getNumOfItems()); HashMap<Itemset, Integer> frequentCount = new HashMap<>(); for (Itemset itemset : candicates) { frequentCount.put(itemset, 0); } for (Transaction transaction : transactions) { Set<Itemset> candidatesInTranscation = hashTree.candidatesInTransaction(transaction); if (candidatesInTranscation == null) { continue; } for (Itemset itemset : candidatesInTranscation) { if (transaction.containItemset(itemset)) { frequentCount.put(itemset, frequentCount.get(itemset) + 1); } } } List<Itemset> result = new ArrayList<>(); for (Itemset itemset : candicates) { if ((double) (frequentCount.get(itemset)) / transactions.size() >= minsup) { result.add(itemset); } } return result; }
public static void main(String[] args) { // Minimum support value double minsup = 0.144; // double minsup = 0.04; // Load data, you should set your own data file location here File data = new File("/Users/walker/Desktop/Courses/DataMining/Assignments/2/assignment2-data.txt"); ArrayList<Transaction> transactions = new ArrayList<>(); ArrayList<String> items = new ArrayList<>(); try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(data), "utf-8"))) { String line; // Load item's name if ((line = br.readLine()) != null) { Collections.addAll(items, line.split("\\s+")); } // Load transactions Transaction transaction; while ((line = br.readLine()) != null) { transaction = new Transaction(line); transactions.add(transaction); } } catch (IOException e) { e.printStackTrace(); } List<Itemset> frequentItemset = apriori(transactions, minsup); HashMap<Itemset, Integer> frequentCount = new HashMap<>(); for (Itemset itemset : frequentItemset) { frequentCount.put(itemset, 0); } for (Transaction transaction : transactions) { for (Itemset itemset : frequentItemset) { if (transaction.containItemset(itemset)) { frequentCount.put(itemset, frequentCount.get(itemset) + 1); } } } for (Itemset itemset : frequentItemset) { System.out.print(itemset); System.out.println((double) frequentCount.get(itemset) / transactions.size()); } }