/** * This method saves a sequential pattern to the output file or in memory, depending on if the * user provided an output file path or not when he launched the algorithm * * @param prefix the pattern to be saved. * @throws IOException exception if error while writing the output file. */ private void savePattern(SequentialPattern prefix) throws IOException { // increase the number of pattern found for statistics purposes patternCount++; // if the result should be saved to a file if (writer != null) { StringBuilder r = new StringBuilder(""); for (Itemset itemset : prefix.getItemsets()) { // r.append('('); for (Integer item : itemset.getItems()) { String string = item.toString(); r.append(string); r.append(' '); } r.append("-1 "); } // // print the list of Pattern IDs that contains this pattern. // if(prefix.getSequencesID() != null){ // r.append(" #SID: "); // for(Integer id : prefix.getSequencesID()){ // r.append(id); // r.append(' '); // } // } r.append(" #SUP: "); r.append(prefix.getSequenceIDs().size()); if (showSequenceIdentifiers) { r.append(" #SID: "); for (Integer sid : prefix.getSequenceIDs()) { r.append(sid); r.append(" "); } } writer.write(r.toString()); writer.newLine(); } // otherwise the result is kept into memory else { patterns.addSequence(prefix, prefix.size()); } }
/** * Method to recursively grow a given sequential pattern. * * @param prefix the current sequential pattern that we want to try to grow * @param database the current projected sequence database * @throws IOException exception if there is an error writing to the output file */ private int recursion(SequentialPattern prefix, List<PseudoSequenceBIDE> contexte) throws IOException { // find frequent items of size 1 in the current projected database. Set<PairBIDE> pairs = findAllFrequentPairs(prefix, contexte); // we will keep tract of the maximum support of patterns // that can be found with this prefix, to check // for forward extension when this method returns. int maxSupport = 0; // For each pair found (a pair is an item with a boolean indicating if it // appears in an itemset that is cut (a postfix) or not, and the sequence IDs // where it appears in the projected database). for (PairBIDE pair : pairs) { // if the item is frequent. if (pair.getCount() >= minsuppAbsolute) { // create the new postfix by appending this item to the prefix SequentialPattern newPrefix; // if the item is part of a postfix if (pair.isPostfix()) { // we append it to the last itemset of the prefix newPrefix = appendItemToPrefixOfSequence(prefix, pair.getItem()); // is =<is, (deltaT,i)> } else { // else, we append it as a new itemset to the sequence newPrefix = appendItemToSequence(prefix, pair.getItem()); } // build the projected database with this item // long start = System.currentTimeMillis(); List<PseudoSequenceBIDE> projectedContext = buildProjectedDatabase( pair.getItem(), contexte, pair.isPostfix(), pair.getSequenceIDs()); // debugProjectDBTime += System.currentTimeMillis() - start; // create new prefix newPrefix.setSequenceIDs(pair.getSequenceIDs()); // variable to keep track of the maximum support of extension // with this item and this prefix if (projectedContext.size() >= minsuppAbsolute) { int maxSupportOfSuccessors = 0; if (!checkBackScanPruning(newPrefix, pair.getSequenceIDs())) { maxSupportOfSuccessors = recursion(newPrefix, projectedContext); // récursion } // check the forward extension for the prefix // if no forward extension if (newPrefix.getSequenceIDs().size() != maxSupportOfSuccessors) { // if there is no backward extension if (!checkBackwardExtension(newPrefix, pair.getSequenceIDs())) { // save the pattern savePattern(newPrefix); } } } else { if (!checkBackwardExtension(newPrefix, pair.getSequenceIDs())) { // save the pattern savePattern(newPrefix); } } // record the largest support of patterns found starting // with this prefix until now if (newPrefix.getAbsoluteSupport() > maxSupport) { maxSupport = newPrefix.getAbsoluteSupport(); } } } return maxSupport; // return the maximum support generated by extension of the prefix }
/** * Method to update the support count of item in a maximum period * * @param prefix the current prefix * @param mapPaires * @param maximum periods a maximum period * @return a set of pairs indicating the support of items (note that a pair distinguish between * items in a postfix, prefix...). */ protected boolean findAllFrequentPairsForBackwardExtensionCheck( int seqProcessedCount, SequentialPattern prefix, PseudoSequenceBIDE maximumPeriod, int iPeriod, Map<PairBIDE, PairBIDE> mapPaires, Integer itemI, Integer itemIm1) { int supportToMatch = prefix.getSequenceIDs().size(); int maxPeriodSize = maximumPeriod.size(); // for each itemset in that period for (int i = 0; i < maxPeriodSize; i++) { int sizeOfItemsetAtI = maximumPeriod.getSizeOfItemsetAt(i); // NEW boolean sawI = false; // sawI after current position boolean sawIm1 = false; // sawI-1 before current position // END NEW // NEW march 20 2010 : check if I is after current position in current itemset for (int j = 0; j < sizeOfItemsetAtI; j++) { Integer item = maximumPeriod.getItemAtInItemsetAt(j, i); if (item.equals(itemI)) { sawI = true; } else if (item > itemI) { break; } } // END NEW for (int j = 0; j < sizeOfItemsetAtI; j++) { Integer item = maximumPeriod.getItemAtInItemsetAt(j, i); if (itemIm1 != null && item == itemIm1) { sawIm1 = true; } boolean isPrefix = maximumPeriod.isCutAtRight(i); boolean isPostfix = maximumPeriod.isPostfix(i); // END NEW PairBIDE paire = new PairBIDE(isPrefix, isPostfix, item); if (seqProcessedCount >= minsuppAbsolute) { // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ // normal case if (addPair(mapPaires, maximumPeriod.getId(), paire, supportToMatch)) { return true; } // NEW: special cases if (sawIm1) { PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item); if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) { return true; } } if (sawI) { PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item); if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) { return true; } } // END NEW } else { // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ // normal case addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire); // // NEW: special cases if (sawIm1) { PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item); addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2); } if (sawI) { PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item); addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2); } // END NEW } } } return false; }