예제 #1
0
  public static void main(String[] arg) throws IOException {

    // the file paths
    String input = fileToPath("contextPasquier99.txt"); // the database
    String output = ".//output.txt"; // the path for saving the frequent itemsets found

    // minimum support
    double minsup = 0.8; // means 2 transaction (we used a relative support)
    // minimum bond
    double minbond = 0.2; // the minimum bond threhsold

    // Loading the transaction database
    TransactionDatabase database = new TransactionDatabase();
    try {
      database.loadFile(input);
    } catch (IOException e) {
      e.printStackTrace();
    }
    //		context.printContext();

    // Applying the ECLAT algorithm
    AlgoCORI algo = new AlgoCORI();
    algo.runAlgorithm(output, database, minsup, minbond, false);
    // if you change use "true" in the line above, ECLAT will use
    // a triangular matrix  for counting support of itemsets of size 2.
    // For some datasets it should make the algorithm faster.

    algo.printStats();
  }
  /**
   * This method scans the database to calculate the support of each single item
   *
   * @param database the transaction database
   * @param mapItemTIDS a map to store the tidset corresponding to each item
   * @return the maximum item id appearing in this database
   */
  int calculateSupportSingleItems(
      TransactionDatabase database, final Map<Integer, BitSetSupport> mapItemTIDS) {
    // (1) First database pass : calculate diffsets of each item.
    int maxItemId = 0;
    // for each transaction
    for (int i = 0; i < database.size(); i++) {
      // Add the transaction id to the set of all transaction ids
      // for each item in that transaction

      // For each item
      for (Integer item : database.getTransactions().get(i)) {
        // Get the current tidset of that item
        BitSetSupport tids = mapItemTIDS.get(item);
        // If none, then we create one
        if (tids == null) {
          tids = new BitSetSupport();
          // For a new item, we sets all the bits of its diffset to true
          tids.bitset.set(0, database.size(), true);
          mapItemTIDS.put(item, tids);
          // we remember the largest item seen until now
          if (item > maxItemId) {
            maxItemId = item;
          }
        }
        // We set to false the bit corresponding to this transaction
        // in the diffset of that item
        tids.bitset.set(i, false);
        // END DECLAT
        // we increase the support of that item
        tids.support++;
      }
    }
    return maxItemId;
  }
  public static void main(String[] arg) throws IOException {
    // Loading the binary context
    String input = fileToPath("contextPasquier99.txt"); // the database
    String output = ".//output.txt"; // the path for saving the frequent itemsets found

    double minsup = 0.4; // means a minsup of 2 transaction (we used a relative support)

    TransactionDatabase database = new TransactionDatabase();
    try {
      database.loadFile(input);
    } catch (IOException e) {
      e.printStackTrace();
    }

    // Applying the ECLAT algorithm
    AlgoEclat_Bitset algo = new AlgoEclat_Bitset();
    algo.runAlgorithm(output, database, minsup, false);

    // NOTE 1: if you  use "true" in the line above, CHARM will use
    // a triangular matrix  for counting support of itemsets of size 2.
    // For some datasets it should make the algorithm faster.

    algo.printStats();
  }