/** * Method to check if a prefix has a backward-extension (see Bide+ article for full details). This * method do it a little bit differently than the BIDE+ article since we iterate with i on * elements of the prefix instead of iterating with a i on the itemsets of the prefix. But the * idea is the same! * * @param prefix the current prefix * @return boolean true, if there is a backward extension */ private boolean checkBackwardExtension(SequentialPattern prefix, Set<Integer> sidset) { // System.out.println("======" + prefix); int totalOccurenceCount = prefix.getItemOccurencesTotalCount(); // For the ith item of the prefix for (int i = 0; i < totalOccurenceCount; i++) { Set<Integer> alreadyVisitedSID = new HashSet<Integer>(); // // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck" Integer itemI = prefix.getIthItem(i); // iPeriod Integer itemIm1 = null; // iPeriod -1 if (i > 0) { itemIm1 = prefix.getIthItem(i - 1); } // // END NEW // Create a Map of pairs to count the support of items (represented by a pair) // in the ith semi-maximum periods Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>(); // (1) For each i, we build the list of maximum periods // for each sequence in the original database // int seqCount =0; int highestSupportUntilNow = -1; // 1703 pat - 9391 ms for (int sequenceID : sidset) { // OPTIMIZATION PART 1== DON'T CHECK THE BACK EXTENSION IF THERE IS NOT ENOUGH SEQUENCE // LEFT TO FIND AN EXTENSION // THIS CAN IMPROVE THE PERFORMANCE BY UP TO 30% on FIFA int remainingSeqID = (sidset.size() - alreadyVisitedSID.size()); if (highestSupportUntilNow != -1 && highestSupportUntilNow + remainingSeqID < sidset.size()) { break; } alreadyVisitedSID.add(sequenceID); // if(!alreadyVisitedSID.contains(sequenceID)) { // seqCount++; // alreadyVisitedSID.add(sequenceID); // } // END OF OPTIMIZATION PART 1 (IT CONTINUES A FEW LINES BELOW...) PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID); PseudoSequenceBIDE period = sequence.getIthMaximumPeriodOfAPrefix(prefix.getItemsets(), i); // if the period is not null if (period != null) { boolean hasBackwardExtension = findAllFrequentPairsForBackwardExtensionCheck( alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1); if (hasBackwardExtension) { // System.out.println(prefix + " has a backward extension from " + i + "th // maxperiod in sequence from seq. " + sequenceID ); return true; } // ===== OPTIMIZATION PART 2 if ((sidset.size() - alreadyVisitedSID.size()) < minsuppAbsolute) { for (PairBIDE pair : mapPaires.values()) { int supportOfPair = pair.getSequenceIDs().size(); if (supportOfPair > highestSupportUntilNow) { highestSupportUntilNow = supportOfPair; // +1 because it may be raised for this sequence... } } } // ===== END OF OPTIMIZATION PART 2 } } } // totaltimeForBackwardExtension += System.currentTimeMillis() - start; return false; // no backward extension, we return false }