/** * This methods checks if a seq. pattern "pattern2" is strictly contained in a seq. pattern * "pattern1". * * @param pattern1 a sequential pattern * @param pattern2 another sequential pattern * @return true if the pattern1 contains pattern2. */ boolean strictlyContains(PrefixVMSP pattern1, PrefixVMSP pattern2) { // // if pattern2 is larger or equal in size, then it cannot be contained in pattern1 // if(pattern1.size() <= pattern2.size()){ // return false; // } // To see if pattern2 is strictly contained in pattern1, // we will search for each itemset i of pattern2 in pattern1 by advancing // in pattern 1 one itemset at a time. int i = 0; // position in pattern2 int j = 0; // position in pattern1 while (true) { // if the itemset at current position in pattern1 contains the itemset // at current position in pattern2 if (pattern1.get(j).containsAll(pattern2.get(i))) { // go to next itemset in pattern2 i++; // if we reached the end of pattern2, then return true if (i == pattern2.size()) { return true; } } // go to next itemset in pattern1 j++; // if we reached the end of pattern1, then pattern2 is not strictly included // in it, and return false if (j >= pattern1.size()) { return false; } // // lastly, for optimization, we check how many itemsets are left to be matched. // // if there is less itemsets left in pattern1 than in pattern2, then it will // // be impossible to get a total match, and so we return false. if ((pattern1.size() - j) < pattern2.size() - i) { return false; } } }
/** * This is the dfsPruning method as described in the SPAM paper. * * @param prefix the current prefix * @param prefixBitmap the bitmap corresponding to the current prefix * @param sn a list of items to be considered for i-steps * @param in a list of items to be considered for s-steps * @param hasToBeGreaterThanForIStep * @param m size of the current prefix in terms of items * @param lastAppendedItem the last appended item to the prefix * @throws IOException if there is an error writing a pattern to the output file * @return TRUE IF A FREQUENT PATTERN WAS CREATED USING THE PREFIX. */ boolean dfsPruning( PrefixVMSP prefix, Bitmap prefixBitmap, List<Integer> sn, List<Integer> in, int hasToBeGreaterThanForIStep, int m, Integer lastAppendedItem) throws IOException { boolean atLeastOneFrequentExtension = false; // System.out.println(prefix.toString()); // ====== S-STEPS ====== // Temporary variables (as described in the paper) List<Integer> sTemp = new ArrayList<Integer>(); List<Bitmap> sTempBitmaps = new ArrayList<Bitmap>(); // for CMAP pruning, we will only check against the last appended item Map<Integer, Integer> mapSupportItemsAfter = coocMapAfter.get(lastAppendedItem); // for each item in sn loopi: for (Integer i : sn) { // LAST POSITION PRUNING /*if (useLastPositionPruning && lastItemPositionMap.get(i) < prefixBitmap.firstItemsetID) { // System.out.println("TEST"); continue loopi; }*/ // CMAP PRUNING // we only check with the last appended item if (useCMAPPruning) { if (mapSupportItemsAfter == null) { continue loopi; } Integer support = mapSupportItemsAfter.get(i); if (support == null || support < minsup) { // System.out.println("PRUNE"); continue loopi; } } // perform the S-STEP with that item to get a new bitmap Bitmap.INTERSECTION_COUNT++; Bitmap newBitmap = prefixBitmap.createNewBitmapSStep(verticalDB.get(i), sequencesSize, lastBitIndex, maxGap); // if the support is higher than minsup if (newBitmap.getSupportWithoutGapTotal() >= minsup) { // record that item and pattern in temporary variables sTemp.add(i); sTempBitmaps.add(newBitmap); } } // for each pattern recorded for the s-step for (int k = 0; k < sTemp.size(); k++) { // STRATEGY: NEWWW atLeastOneFrequentExtension = true; int item = sTemp.get(k); // create the new prefix PrefixVMSP prefixSStep = prefix.cloneSequence(); prefixSStep.addItemset(new Itemset(item)); if (item % 2 == 0) { prefixSStep.sumOfEvenItems = item + prefix.sumOfEvenItems; prefixSStep.sumOfOddItems = prefix.sumOfOddItems; } else { prefixSStep.sumOfEvenItems = prefix.sumOfEvenItems; prefixSStep.sumOfOddItems = item + prefix.sumOfOddItems; } // prefixSStep.sumOfItems = item + prefix.sumOfItems; // create the new bitmap Bitmap newBitmap = sTempBitmaps.get(k); // save the pattern to the file if (newBitmap.getSupport() >= minsup) { boolean hasFrequentExtension = false; // recursively try to extend that pattern if (maximumPatternLength > m) { hasFrequentExtension = dfsPruning(prefixSStep, newBitmap, sTemp, sTemp, item, m + 1, item); } if (hasFrequentExtension == false) { savePatternMultipleItems(prefixSStep, newBitmap, m); } } } Map<Integer, Integer> mapSupportItemsEquals = coocMapEquals.get(lastAppendedItem); // ======== I STEPS ======= // Temporary variables List<Integer> iTemp = new ArrayList<Integer>(); List<Bitmap> iTempBitmaps = new ArrayList<Bitmap>(); // for each item in in loop2: for (Integer i : in) { // the item has to be greater than the largest item // already in the last itemset of prefix. if (i > hasToBeGreaterThanForIStep) { // LAST POSITION PRUNING /*if (useLastPositionPruning && lastItemPositionMap.get(i) < prefixBitmap.firstItemsetID) { continue loop2; }*/ // CMAP PRUNING if (useCMAPPruning) { if (mapSupportItemsEquals == null) { continue loop2; } Integer support = mapSupportItemsEquals.get(i); if (support == null || support < minsup) { continue loop2; } } // Perform an i-step with this item and the current prefix. // This creates a new bitmap Bitmap.INTERSECTION_COUNT++; Bitmap newBitmap = prefixBitmap.createNewBitmapIStep(verticalDB.get(i), sequencesSize, lastBitIndex); // If the support is no less than minsup if (newBitmap.getSupport() >= minsup) { // record that item and pattern in temporary variables iTemp.add(i); iTempBitmaps.add(newBitmap); } } } // for each pattern recorded for the i-step for (int k = 0; k < iTemp.size(); k++) { // STRATEGY: NEWWW atLeastOneFrequentExtension = true; int item = iTemp.get(k); // create the new prefix PrefixVMSP prefixIStep = prefix.cloneSequence(); prefixIStep.getItemsets().get(prefixIStep.size() - 1).addItem(item); if (item % 2 == 0) { prefixIStep.sumOfEvenItems = item + prefix.sumOfEvenItems; prefixIStep.sumOfOddItems = prefix.sumOfOddItems; } else { prefixIStep.sumOfEvenItems = prefix.sumOfEvenItems; prefixIStep.sumOfOddItems = item + prefix.sumOfOddItems; } // create the new bitmap Bitmap newBitmap = iTempBitmaps.get(k); // recursively try to extend that pattern boolean hasFrequentExtension = false; if (maximumPatternLength > m) { hasFrequentExtension = dfsPruning(prefixIStep, newBitmap, sTemp, iTemp, item, m + 1, item); } if (hasFrequentExtension == false) { // save the pattern savePatternMultipleItems(prefixIStep, newBitmap, m); } } // check the memory usage MemoryLogger.getInstance().checkMemory(); return atLeastOneFrequentExtension || useStrategyForwardExtensionChecking == false; }