// Generate C(k+1) by join itemset-pairs in F(k) private static List<Itemset> generateCandidates(List<Itemset> frequentItemsets) { if (frequentItemsets.isEmpty() || frequentItemsets.size() == 1) { return new ArrayList<>(); } Collections.sort(frequentItemsets); List<Itemset> candidates = new ArrayList<>(); Itemset candidate; for (int i = 0, j = 1; i != frequentItemsets.size(); ) { while (j != frequentItemsets.size() && Itemset.generateCandidate(frequentItemsets.get(i), frequentItemsets.get(j)) != null) { ++j; } for (int k = i; k != j; ++k) { for (int l = k + 1; l != j; ++l) { Itemset itemset = (Itemset.generateCandidate(frequentItemsets.get(k), frequentItemsets.get(l))); assert itemset != null; candidates.add(itemset); } } i = j; j++; } return candidates; }
// Prune itemsets from C(k+1) that violate downward closure private static List<Itemset> prune(List<Itemset> candidates, List<Itemset> frequentItemsets) { List<Itemset> prunedCandicates = new ArrayList<>(); for (Itemset candidate : candidates) { if (frequentItemsets.containsAll(candidate.downwardClosure())) { prunedCandicates.add(candidate); } } return prunedCandicates; }
/** * It adds a sequence from an array of string that we have to interpret * * @param integers * @param sequenceID */ public void addSequence(String[] integers, int sequenceID) { long timestamp = -1; Sequence sequence = new Sequence(sequences.size()); sequence.setID(sequenceID); Itemset itemset = new Itemset(); int inicio = 0; Map<Item, Boolean> counted = new HashMap<Item, Boolean>(); for (int i = inicio; i < integers.length; i++) { if (integers[i].codePointAt(0) == '<') { // Timestamp String value = integers[i].substring(1, integers[i].length() - 1); timestamp = Long.parseLong(value); itemset.setTimestamp(timestamp); } else if (integers[i].equals("-1")) { // end of an itemset long time = itemset.getTimestamp() + 1; sequence.addItemset(itemset); itemset = new Itemset(); itemset.setTimestamp(time); } else if (integers[i].equals("-2")) { // end of a sequence sequences.add(sequence); } else { // extract the value for an item Item item = itemFactory.getItem(Integer.parseInt(integers[i])); if (counted.get(item) == null) { counted.put(item, Boolean.TRUE); BitSet appearances = frequentItems.get(item); if (appearances == null) { appearances = new BitSet(); frequentItems.put(item, appearances); } appearances.set(sequence.getId()); } itemset.addItem(item); } } }
/** * Add a new row to the database. If this is to be the first row added to the database you must * have called setColumnNames() before. * * @param itemset the new row to be added to the data file * @exception IOException from library call * @exception DBException column names have not been set or an invalid item was contained in the * itemset */ public void addRow(Itemset itemset) throws IOException, DBException { if (wroteColumnNames == false) throw new DBException("Column names must be set first"); int size = itemset.size(); for (int i = 0; i < size; i++) if (itemset.get(i) > numColumns) throw new DBException("Attempt to write invalid item"); if (needReposition == true) { outStream.seek(lastPosition); needReposition = false; } outStream.writeInt(size); CRC = updateCRC(CRC, size); int item; for (int i = 0; i < size; i++) { item = itemset.get(i); outStream.writeInt(item); CRC = updateCRC(CRC, item); } numRows++; }
/** sample usage and testing */ public static void main(String[] args) { Itemset is1 = new Itemset(); is1.add(1); is1.add(2); Itemset is2 = new Itemset(); is2.add(3); is2.add(2); Itemset is3 = new Itemset(); is3.add(3); is3.add(1); Itemset is4 = new Itemset(); is4.add(33); is4.add(3); ArrayList colNames = new ArrayList(3); colNames.add("cheese"); colNames.add("pizza"); colNames.add("beer"); System.out.println("\n\nCreating invalid database:"); try { RandomAccessFile invalid = new RandomAccessFile("invalid.db", "rw"); invalid.writeChars(ID + " - a bogus file that looks like a valid one"); invalid.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nCreating corrupted database:"); try { DBWriter corrupted = new DBWriter("corrupted.db"); try { corrupted.addRow(is1); } catch (DBException e) { System.out.println(e); } corrupted.setDescription("a corrupted database"); corrupted.setColumnNames(colNames); corrupted.addRow(is1); corrupted.setDescription("a corrupted database - 2"); corrupted.setColumnNames(colNames); corrupted.addRow(is2); corrupted.setDescription("a corrupted database - 3"); corrupted.addRow(is3); try { corrupted.addRow(is4); } catch (DBException e) { System.out.println(e); } corrupted.close(); System.out.println("corrupting file"); RandomAccessFile raf = new RandomAccessFile("corrupted.db", "rw"); raf.seek(770); // replace the 2 in the second itemset with a 3 raf.writeInt(3); raf.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nCreating empty database:"); try { DBWriter empty = new DBWriter("empty.db"); empty.setDescription("an empty database"); empty.setColumnNames(colNames); empty.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nCreating correct database:"); try { DBWriter correct = new DBWriter("correct.db"); correct.setDescription("a correct database"); correct.setColumnNames(colNames); correct.addRow(is1); correct.setDescription("a correct database - 2"); correct.setColumnNames(colNames); correct.addRow(is2); correct.setDescription("a correct database - 3"); correct.addRow(is3); correct.close(); correct = new DBWriter("correct.db"); correct.setColumnNames(colNames); correct.addRow(is1); correct.setDescription("a correct database - 4"); correct.setColumnNames(colNames); correct.addRow(is2); correct.setDescription("a correct database - 5"); correct.addRow(is3); correct.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } System.out.println("\n\nOpening and closing DBWriter:"); try { DBWriter bummer = new DBWriter("bummer.db"); bummer.close(); } catch (Exception e) { System.out.println("Shouldn't have happened: " + e); } }