/** * This method saves a sequential pattern to the output file or in memory, depending on if the * user provided an output file path or not when he launched the algorithm * * @param prefix the pattern to be saved. * @throws IOException exception if error while writing the output file. */ private void savePattern(SequentialPattern prefix) throws IOException { // increase the number of pattern found for statistics purposes patternCount++; // if the result should be saved to a file if (writer != null) { StringBuffer r = new StringBuffer(""); for (Itemset itemset : prefix.getItemsets()) { // r.append('('); for (String item : itemset.getItems()) { String string = item.toString(); r.append(string); r.append(' '); } r.append("-1 "); } // // // print the list of Pattern IDs that contains this pattern. // if(prefix.getSequencesID() != null){ // r.append("SID: "); // for(Integer id : prefix.getSequencesID()){ // r.append(id); // r.append(' '); // } // } r.append(" #SUP: "); r.append(prefix.getSequencesID().size()); writer.write(r.toString()); writer.newLine(); } // otherwise the result is kept into memory else { patterns.addSequence(prefix, prefix.size()); } }
/** * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details). * * @param prefix the current prefix * @param projectedContext the projected database * @return boolean true if we should not extend the prefix */ private boolean checkBackScanPruning( SequentialPattern prefix, List<PseudoSequenceBIDE> projectedContext) { // DEBUGGIN if (prefix.size() == 1 && prefix.get(0).get(0) == 5) { System.out.println("PREFIX 5 "); } // // See the BIDE+ paper for details about this method. // For the number of item occurences that can be generated with this prefix: for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) { Set<Integer> alreadyVisitedSID = new HashSet<Integer>(); // Create a Map of pairs to count the support of items (represented by a pair) // in the ith semi-maximum periods Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>(); // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck" Integer itemI = prefix.getIthItem(i); // iPeriod Integer itemIm1 = null; // iPeriod -1 if (i > 0) { itemIm1 = prefix.getIthItem(i - 1); } // // END NEW // (1) For each i, we build the list of maximum periods // for each sequence in the original database for (PseudoSequenceBIDE pseudoSequence : projectedContext) { int sequenceID = pseudoSequence.sequence.getId(); alreadyVisitedSID.add(pseudoSequence.sequence.getId()); Position currentCutPosition = new Position(pseudoSequence.firstItemset, pseudoSequence.firstItem); PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID); PseudoSequenceBIDE period = sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i, currentCutPosition); if (period != null) { // // we add it to the list of maximum periods boolean hasExtension = findAllFrequentPairsForBackwardExtensionCheck( prefix.getAbsoluteSupport(), prefix, period, i, mapPaires, itemI, itemIm1, currentCutPosition); if (hasExtension) { return true; } } } } return false; }
/** * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details). * * @param prefix the current prefix * @return boolean true if we should not extend the prefix */ private boolean checkBackScanPruning(SequentialPattern prefix, Set<Integer> sidset) { // // See the BIDE+ paper for details about this method. // For the number of item occurences that can be generated with this prefix: for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) { Set<Integer> alreadyVisitedSID = new HashSet<Integer>(); // Create a Map of pairs to count the support of items (represented by a pair) // in the ith semi-maximum periods Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>(); // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck" Integer itemI = prefix.getIthItem(i); // iPeriod Integer itemIm1 = null; // iPeriod -1 if (i > 0) { itemIm1 = prefix.getIthItem(i - 1); } // // END NEW int seqCount = 0; // int highestSupportUntilNow = -1; // (1) For each i, we build the list of maximum periods // for each sequence in the original database for (int sequenceID : sidset) { alreadyVisitedSID.add(sequenceID); PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID); PseudoSequenceBIDE period = sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i); if (period != null) { // // we add it to the list of maximum periods boolean hasExtension = findAllFrequentPairsForBackwardExtensionCheck( alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1); if (hasExtension) { return true; } } } } return false; }
/** * Method to check if a prefix has a backward-extension (see Bide+ article for full details). This * method do it a little bit differently than the BIDE+ article since we iterate with i on * elements of the prefix instead of iterating with a i on the itemsets of the prefix. But the * idea is the same! * * @param prefix the current prefix * @return boolean true, if there is a backward extension */ private boolean checkBackwardExtension(SequentialPattern prefix, Set<Integer> sidset) { // System.out.println("======" + prefix); int totalOccurenceCount = prefix.getItemOccurencesTotalCount(); // For the ith item of the prefix for (int i = 0; i < totalOccurenceCount; i++) { Set<Integer> alreadyVisitedSID = new HashSet<Integer>(); // // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck" Integer itemI = prefix.getIthItem(i); // iPeriod Integer itemIm1 = null; // iPeriod -1 if (i > 0) { itemIm1 = prefix.getIthItem(i - 1); } // // END NEW // Create a Map of pairs to count the support of items (represented by a pair) // in the ith semi-maximum periods Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>(); // (1) For each i, we build the list of maximum periods // for each sequence in the original database // int seqCount =0; int highestSupportUntilNow = -1; // 1703 pat - 9391 ms for (int sequenceID : sidset) { // OPTIMIZATION PART 1== DON'T CHECK THE BACK EXTENSION IF THERE IS NOT ENOUGH SEQUENCE // LEFT TO FIND AN EXTENSION // THIS CAN IMPROVE THE PERFORMANCE BY UP TO 30% on FIFA int remainingSeqID = (sidset.size() - alreadyVisitedSID.size()); if (highestSupportUntilNow != -1 && highestSupportUntilNow + remainingSeqID < sidset.size()) { break; } alreadyVisitedSID.add(sequenceID); // if(!alreadyVisitedSID.contains(sequenceID)) { // seqCount++; // alreadyVisitedSID.add(sequenceID); // } // END OF OPTIMIZATION PART 1 (IT CONTINUES A FEW LINES BELOW...) PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID); PseudoSequenceBIDE period = sequence.getIthMaximumPeriodOfAPrefix(prefix.getItemsets(), i); // if the period is not null if (period != null) { boolean hasBackwardExtension = findAllFrequentPairsForBackwardExtensionCheck( alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1); if (hasBackwardExtension) { // System.out.println(prefix + " has a backward extension from " + i + "th // maxperiod in sequence from seq. " + sequenceID ); return true; } // ===== OPTIMIZATION PART 2 if ((sidset.size() - alreadyVisitedSID.size()) < minsuppAbsolute) { for (PairBIDE pair : mapPaires.values()) { int supportOfPair = pair.getSequenceIDs().size(); if (supportOfPair > highestSupportUntilNow) { highestSupportUntilNow = supportOfPair; // +1 because it may be raised for this sequence... } } } // ===== END OF OPTIMIZATION PART 2 } } } // totaltimeForBackwardExtension += System.currentTimeMillis() - start; return false; // no backward extension, we return false }