// Generate C(k+1) by join itemset-pairs in F(k)
  private static List<Itemset> generateCandidates(List<Itemset> frequentItemsets) {

    if (frequentItemsets.isEmpty() || frequentItemsets.size() == 1) {
      return new ArrayList<>();
    }

    Collections.sort(frequentItemsets);

    List<Itemset> candidates = new ArrayList<>();
    Itemset candidate;
    for (int i = 0, j = 1; i != frequentItemsets.size(); ) {
      while (j != frequentItemsets.size()
          && Itemset.generateCandidate(frequentItemsets.get(i), frequentItemsets.get(j)) != null) {
        ++j;
      }
      for (int k = i; k != j; ++k) {
        for (int l = k + 1; l != j; ++l) {
          Itemset itemset =
              (Itemset.generateCandidate(frequentItemsets.get(k), frequentItemsets.get(l)));
          assert itemset != null;
          candidates.add(itemset);
        }
      }
      i = j;
      j++;
    }

    return candidates;
  }
 // Prune itemsets from C(k+1) that violate downward closure
 private static List<Itemset> prune(List<Itemset> candidates, List<Itemset> frequentItemsets) {
   List<Itemset> prunedCandicates = new ArrayList<>();
   for (Itemset candidate : candidates) {
     if (frequentItemsets.containsAll(candidate.downwardClosure())) {
       prunedCandicates.add(candidate);
     }
   }
   return prunedCandicates;
 }
  /**
   * It adds a sequence from an array of string that we have to interpret
   *
   * @param integers
   * @param sequenceID
   */
  public void addSequence(String[] integers, int sequenceID) {
    long timestamp = -1;
    Sequence sequence = new Sequence(sequences.size());
    sequence.setID(sequenceID);
    Itemset itemset = new Itemset();
    int inicio = 0;
    Map<Item, Boolean> counted = new HashMap<Item, Boolean>();

    for (int i = inicio; i < integers.length; i++) {
      if (integers[i].codePointAt(0) == '<') { // Timestamp
        String value = integers[i].substring(1, integers[i].length() - 1);
        timestamp = Long.parseLong(value);
        itemset.setTimestamp(timestamp);
      } else if (integers[i].equals("-1")) { // end of an itemset
        long time = itemset.getTimestamp() + 1;
        sequence.addItemset(itemset);
        itemset = new Itemset();
        itemset.setTimestamp(time);
      } else if (integers[i].equals("-2")) { // end of a sequence
        sequences.add(sequence);
      } else {
        // extract the value for an item
        Item item = itemFactory.getItem(Integer.parseInt(integers[i]));
        if (counted.get(item) == null) {
          counted.put(item, Boolean.TRUE);
          BitSet appearances = frequentItems.get(item);
          if (appearances == null) {
            appearances = new BitSet();
            frequentItems.put(item, appearances);
          }
          appearances.set(sequence.getId());
        }
        itemset.addItem(item);
      }
    }
  }
Beispiel #4
0
  /**
   * Add a new row to the database. If this is to be the first row added to the database you must
   * have called setColumnNames() before.
   *
   * @param itemset the new row to be added to the data file
   * @exception IOException from library call
   * @exception DBException column names have not been set or an invalid item was contained in the
   *     itemset
   */
  public void addRow(Itemset itemset) throws IOException, DBException {
    if (wroteColumnNames == false) throw new DBException("Column names must be set first");

    int size = itemset.size();
    for (int i = 0; i < size; i++)
      if (itemset.get(i) > numColumns) throw new DBException("Attempt to write invalid item");

    if (needReposition == true) {
      outStream.seek(lastPosition);
      needReposition = false;
    }

    outStream.writeInt(size);
    CRC = updateCRC(CRC, size);

    int item;
    for (int i = 0; i < size; i++) {
      item = itemset.get(i);
      outStream.writeInt(item);
      CRC = updateCRC(CRC, item);
    }

    numRows++;
  }
Beispiel #5
0
  /** sample usage and testing */
  public static void main(String[] args) {
    Itemset is1 = new Itemset();
    is1.add(1);
    is1.add(2);
    Itemset is2 = new Itemset();
    is2.add(3);
    is2.add(2);
    Itemset is3 = new Itemset();
    is3.add(3);
    is3.add(1);
    Itemset is4 = new Itemset();
    is4.add(33);
    is4.add(3);

    ArrayList colNames = new ArrayList(3);
    colNames.add("cheese");
    colNames.add("pizza");
    colNames.add("beer");

    System.out.println("\n\nCreating invalid database:");
    try {
      RandomAccessFile invalid = new RandomAccessFile("invalid.db", "rw");
      invalid.writeChars(ID + " - a bogus file that looks like a valid one");
      invalid.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nCreating corrupted database:");
    try {
      DBWriter corrupted = new DBWriter("corrupted.db");

      try {
        corrupted.addRow(is1);
      } catch (DBException e) {
        System.out.println(e);
      }

      corrupted.setDescription("a corrupted database");
      corrupted.setColumnNames(colNames);

      corrupted.addRow(is1);
      corrupted.setDescription("a corrupted database - 2");
      corrupted.setColumnNames(colNames);
      corrupted.addRow(is2);
      corrupted.setDescription("a corrupted database - 3");
      corrupted.addRow(is3);

      try {
        corrupted.addRow(is4);
      } catch (DBException e) {
        System.out.println(e);
      }

      corrupted.close();

      System.out.println("corrupting file");

      RandomAccessFile raf = new RandomAccessFile("corrupted.db", "rw");
      raf.seek(770);
      // replace the 2 in the second itemset with a 3
      raf.writeInt(3);
      raf.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nCreating empty database:");
    try {
      DBWriter empty = new DBWriter("empty.db");

      empty.setDescription("an empty database");
      empty.setColumnNames(colNames);
      empty.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nCreating correct database:");
    try {
      DBWriter correct = new DBWriter("correct.db");

      correct.setDescription("a correct database");
      correct.setColumnNames(colNames);

      correct.addRow(is1);
      correct.setDescription("a correct database - 2");
      correct.setColumnNames(colNames);
      correct.addRow(is2);
      correct.setDescription("a correct database - 3");
      correct.addRow(is3);

      correct.close();

      correct = new DBWriter("correct.db");

      correct.setColumnNames(colNames);

      correct.addRow(is1);
      correct.setDescription("a correct database - 4");
      correct.setColumnNames(colNames);
      correct.addRow(is2);
      correct.setDescription("a correct database - 5");
      correct.addRow(is3);

      correct.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }

    System.out.println("\n\nOpening and closing DBWriter:");
    try {
      DBWriter bummer = new DBWriter("bummer.db");
      bummer.close();
    } catch (Exception e) {
      System.out.println("Shouldn't have happened: " + e);
    }
  }