/** * Method to find all frequent items in a projected sequence database * * @param sequences the set of sequences * @return A list of pairs, where a pair is an item with (1) booleans indicating if it is in an * itemset that is "cut" at left or right (prefix or postfix) and (2) the sequence IDs where * it occurs. */ protected Set<PairBIDE> findAllFrequentPairs( SequentialPattern prefix, List<PseudoSequenceBIDE> sequences) { // We use a Map the store the pairs. Map<PairBIDE, PairBIDE> mapPairs = new HashMap<PairBIDE, PairBIDE>(); // for each sequence for (PseudoSequenceBIDE sequence : sequences) { // for each itemset for (int i = 0; i < sequence.size(); i++) { // for each item for (int j = 0; j < sequence.getSizeOfItemsetAt(i); j++) { Integer item = sequence.getItemAtInItemsetAt(j, i); // create the pair corresponding to this item PairBIDE pair = new PairBIDE(sequence.isCutAtRight(i), sequence.isPostfix(i), item); // register this sequenceID for that pair. addPairWithoutCheck(mapPairs, sequence.getId(), pair); } } } // check the memory usage MemoryLogger.getInstance().checkMemory(); return mapPairs.keySet(); // return the pairs. }
/** * Method to update the support count of item in a maximum period * * @param prefix the current prefix * @param mapPaires * @param maximum periods a maximum period * @return a set of pairs indicating the support of items (note that a pair distinguish between * items in a postfix, prefix...). */ protected boolean findAllFrequentPairsForBackwardExtensionCheck( int seqProcessedCount, SequentialPattern prefix, PseudoSequenceBIDE maximumPeriod, int iPeriod, Map<PairBIDE, PairBIDE> mapPaires, Integer itemI, Integer itemIm1) { int supportToMatch = prefix.getSequenceIDs().size(); int maxPeriodSize = maximumPeriod.size(); // for each itemset in that period for (int i = 0; i < maxPeriodSize; i++) { int sizeOfItemsetAtI = maximumPeriod.getSizeOfItemsetAt(i); // NEW boolean sawI = false; // sawI after current position boolean sawIm1 = false; // sawI-1 before current position // END NEW // NEW march 20 2010 : check if I is after current position in current itemset for (int j = 0; j < sizeOfItemsetAtI; j++) { Integer item = maximumPeriod.getItemAtInItemsetAt(j, i); if (item.equals(itemI)) { sawI = true; } else if (item > itemI) { break; } } // END NEW for (int j = 0; j < sizeOfItemsetAtI; j++) { Integer item = maximumPeriod.getItemAtInItemsetAt(j, i); if (itemIm1 != null && item == itemIm1) { sawIm1 = true; } boolean isPrefix = maximumPeriod.isCutAtRight(i); boolean isPostfix = maximumPeriod.isPostfix(i); // END NEW PairBIDE paire = new PairBIDE(isPrefix, isPostfix, item); if (seqProcessedCount >= minsuppAbsolute) { // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ // normal case if (addPair(mapPaires, maximumPeriod.getId(), paire, supportToMatch)) { return true; } // NEW: special cases if (sawIm1) { PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item); if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) { return true; } } if (sawI) { PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item); if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) { return true; } } // END NEW } else { // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ // normal case addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire); // // NEW: special cases if (sawIm1) { PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item); addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2); } if (sawI) { PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item); addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2); } // END NEW } } } return false; }