/** * This method creates a copy of the sequence and add a given item to the last itemset of the * sequence. It sets the support of the sequence as the support of the item. * * @param prefix the sequence * @param item the item * @return the new sequence */ private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, Integer item) { SequentialPattern newPrefix = prefix.cloneSequence(); // add to the last itemset Itemset itemset = newPrefix.get(newPrefix.size() - 1); itemset.addItem(item); return newPrefix; }
/** * This method saves a sequential pattern to the output file or in memory, depending on if the * user provided an output file path or not when he launched the algorithm * * @param prefix the pattern to be saved. * @throws IOException exception if error while writing the output file. */ private void savePattern(SequentialPattern prefix) throws IOException { // increase the number of pattern found for statistics purposes patternCount++; // if the result should be saved to a file if (writer != null) { StringBuffer r = new StringBuffer(""); for (Itemset itemset : prefix.getItemsets()) { // r.append('('); for (String item : itemset.getItems()) { String string = item.toString(); r.append(string); r.append(' '); } r.append("-1 "); } // // // print the list of Pattern IDs that contains this pattern. // if(prefix.getSequencesID() != null){ // r.append("SID: "); // for(Integer id : prefix.getSequencesID()){ // r.append(id); // r.append(' '); // } // } r.append(" #SUP: "); r.append(prefix.getSequencesID().size()); writer.write(r.toString()); writer.newLine(); } // otherwise the result is kept into memory else { patterns.addSequence(prefix, prefix.size()); } }
/** * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details). * * @param prefix the current prefix * @param projectedContext the projected database * @return boolean true if we should not extend the prefix */ private boolean checkBackScanPruning( SequentialPattern prefix, List<PseudoSequenceBIDE> projectedContext) { // DEBUGGIN if (prefix.size() == 1 && prefix.get(0).get(0) == 5) { System.out.println("PREFIX 5 "); } // // See the BIDE+ paper for details about this method. // For the number of item occurences that can be generated with this prefix: for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) { Set<Integer> alreadyVisitedSID = new HashSet<Integer>(); // Create a Map of pairs to count the support of items (represented by a pair) // in the ith semi-maximum periods Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>(); // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck" Integer itemI = prefix.getIthItem(i); // iPeriod Integer itemIm1 = null; // iPeriod -1 if (i > 0) { itemIm1 = prefix.getIthItem(i - 1); } // // END NEW // (1) For each i, we build the list of maximum periods // for each sequence in the original database for (PseudoSequenceBIDE pseudoSequence : projectedContext) { int sequenceID = pseudoSequence.sequence.getId(); alreadyVisitedSID.add(pseudoSequence.sequence.getId()); Position currentCutPosition = new Position(pseudoSequence.firstItemset, pseudoSequence.firstItem); PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID); PseudoSequenceBIDE period = sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i, currentCutPosition); if (period != null) { // // we add it to the list of maximum periods boolean hasExtension = findAllFrequentPairsForBackwardExtensionCheck( prefix.getAbsoluteSupport(), prefix, period, i, mapPaires, itemI, itemIm1, currentCutPosition); if (hasExtension) { return true; } } } } return false; }
/** * This method creates a copy of the sequence and add a given item to the last itemset of the * sequence. It sets the support of the sequence as the support of the item. * * @param prefix the sequence * @param item the item * @return the new sequence */ private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, String item) { SequentialPattern newPrefix = prefix.cloneSequence(); Itemset itemset = newPrefix.get(newPrefix.size() - 1); itemset.addItem(item); return newPrefix; }
/** * Method to recursively grow a given sequential pattern. * * @param prefix the current sequential pattern that we want to try to grow * @param database the current projected sequence database * @throws IOException exception if there is an error writing to the output file */ private int recursion(SequentialPattern prefix, List<PseudoSequenceBIDE> contexte) throws IOException { if (prefix.size() == 1 && prefix.get(0).get(0) == 5) { System.out.println("PREFIXBY5 : " + prefix); } // find frequent items of size 1 in the current projected database. Set<PairBIDE> pairs = findAllFrequentPairs(prefix, contexte); // DEBUGING System.out.println("PREFIX: " + prefix); if (prefix.size() == 2 && prefix.get(0).size() == 3 && prefix.get(1).size() == 3 && prefix.get(0).get(0) == 1 && prefix.get(0).get(1) == 3 && prefix.get(0).get(2) == 5 && prefix.get(1).get(0) == 2 && prefix.get(1).get(1) == 3 && prefix.get(1).get(2) == 5) { System.out.println("DEBUGGING !!!!!!!! #### (1 3 5 )(2 3 5 )"); } if (prefix.size() == 2 && prefix.get(0).size() == 3 && prefix.get(1).size() == 2 && prefix.get(0).get(0) == 1 && prefix.get(0).get(1) == 3 && prefix.get(0).get(2) == 5 && prefix.get(1).get(0) == 2 && prefix.get(1).get(1) == 3) { System.out.println("DEBUGGING !!!!!!!! #### (1 3 5 )(2 3 )"); } // we will keep tract of the maximum support of patterns // that can be found with this prefix, to check // for forward extension when this method returns. int maxSupport = 0; // For each pair found (a pair is an item with a boolean indicating if it // appears in an itemset that is cut (a postfix) or not, and the sequence IDs // where it appears in the projected database). for (PairBIDE pair : pairs) { // if the item is frequent. if (pair.getCount() >= minsuppAbsolute) { // create the new postfix by appending this item to the prefix SequentialPattern newPrefix; // if the item is part of a postfix if (pair.isPostfix()) { // we append it to the last itemset of the prefix newPrefix = appendItemToPrefixOfSequence(prefix, pair.getItem()); // is =<is, (deltaT,i)> } else { // else, we append it as a new itemset to the sequence newPrefix = appendItemToSequence(prefix, pair.getItem()); } // build the projected database with this item // long start = System.currentTimeMillis(); List<PseudoSequenceBIDE> projectedContext = buildProjectedDatabase( pair.getItem(), contexte, pair.isPostfix(), pair.getSequenceIDs()); // debugProjectDBTime += System.currentTimeMillis() - start; // create new prefix newPrefix.setSequenceIDs(pair.getSequenceIDs()); // variable to keep track of the maximum support of extension // with this item and this prefix if (projectedContext.size() >= minsuppAbsolute) { int maxSupportOfSuccessors = 0; if (!checkBackScanPruning(newPrefix, projectedContext)) { maxSupportOfSuccessors = recursion(newPrefix, projectedContext); // r�cursion } // check the forward extension for the prefix // if no forward extension if (newPrefix.getSequenceIDs().size() != maxSupportOfSuccessors) { // if there is no backward extension if (!checkBackwardExtension(newPrefix, projectedContext)) { // save the pattern savePattern(newPrefix); } } } else { if (!checkBackwardExtension(newPrefix, projectedContext)) { // save the pattern savePattern(newPrefix); } } // record the largest support of patterns found starting // with this prefix until now if (newPrefix.getAbsoluteSupport() > maxSupport) { maxSupport = newPrefix.getAbsoluteSupport(); } } } return maxSupport; // return the maximum support generated by extension of the prefix }