예제 #1
0
  // Determine F(k+1) by support counting on (C(K+1), T) and retaining itemsets from C(k+1) with
  // support at least minsup
  private static List<Itemset> determineFrequentItemsets(
      List<Itemset> candicates, List<Transaction> transactions, double minsup) {
    if (candicates.isEmpty()) {
      return null;
    }

    HashTree hashTree = new HashTree(candicates, candicates.get(0).getNumOfItems());
    HashMap<Itemset, Integer> frequentCount = new HashMap<>();
    for (Itemset itemset : candicates) {
      frequentCount.put(itemset, 0);
    }

    for (Transaction transaction : transactions) {
      Set<Itemset> candidatesInTranscation = hashTree.candidatesInTransaction(transaction);
      if (candidatesInTranscation == null) {
        continue;
      }
      for (Itemset itemset : candidatesInTranscation) {
        if (transaction.containItemset(itemset)) {
          frequentCount.put(itemset, frequentCount.get(itemset) + 1);
        }
      }
    }

    List<Itemset> result = new ArrayList<>();
    for (Itemset itemset : candicates) {
      if ((double) (frequentCount.get(itemset)) / transactions.size() >= minsup) {
        result.add(itemset);
      }
    }
    return result;
  }
예제 #2
0
  public static void main(String[] args) {

    // Minimum support value
    double minsup = 0.144;
    //        double minsup = 0.04;
    // Load data, you should set your own data file location here
    File data =
        new File("/Users/walker/Desktop/Courses/DataMining/Assignments/2/assignment2-data.txt");
    ArrayList<Transaction> transactions = new ArrayList<>();
    ArrayList<String> items = new ArrayList<>();
    try (BufferedReader br =
        new BufferedReader(new InputStreamReader(new FileInputStream(data), "utf-8"))) {
      String line;
      // Load item's name
      if ((line = br.readLine()) != null) {
        Collections.addAll(items, line.split("\\s+"));
      }

      // Load transactions
      Transaction transaction;
      while ((line = br.readLine()) != null) {
        transaction = new Transaction(line);
        transactions.add(transaction);
      }

    } catch (IOException e) {
      e.printStackTrace();
    }

    List<Itemset> frequentItemset = apriori(transactions, minsup);
    HashMap<Itemset, Integer> frequentCount = new HashMap<>();
    for (Itemset itemset : frequentItemset) {
      frequentCount.put(itemset, 0);
    }
    for (Transaction transaction : transactions) {
      for (Itemset itemset : frequentItemset) {
        if (transaction.containItemset(itemset)) {
          frequentCount.put(itemset, frequentCount.get(itemset) + 1);
        }
      }
    }

    for (Itemset itemset : frequentItemset) {
      System.out.print(itemset);
      System.out.println((double) frequentCount.get(itemset) / transactions.size());
    }
  }