コード例 #1
0
ファイル: AlgoBIDEPlus.java プロジェクト: chengsq/spmf
  /**
   * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some
   * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details).
   *
   * @param prefix the current prefix
   * @param projectedContext the projected database
   * @return boolean true if we should not extend the prefix
   */
  private boolean checkBackScanPruning(
      SequentialPattern prefix, List<PseudoSequenceBIDE> projectedContext) {

    // DEBUGGIN
    if (prefix.size() == 1 && prefix.get(0).get(0) == 5) {
      System.out.println("PREFIX 5 ");
    }
    //
    // See the BIDE+ paper for details about this method.
    // For the number of item occurences that can be generated with this prefix:
    for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //						// END NEW

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      for (PseudoSequenceBIDE pseudoSequence : projectedContext) {
        int sequenceID = pseudoSequence.sequence.getId();
        alreadyVisitedSID.add(pseudoSequence.sequence.getId());

        Position currentCutPosition =
            new Position(pseudoSequence.firstItemset, pseudoSequence.firstItem);

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);
        PseudoSequenceBIDE period =
            sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i, currentCutPosition);

        if (period != null) {
          //					// we add it to the list of maximum periods
          boolean hasExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  prefix.getAbsoluteSupport(),
                  prefix,
                  period,
                  i,
                  mapPaires,
                  itemI,
                  itemIm1,
                  currentCutPosition);
          if (hasExtension) {
            return true;
          }
        }
      }
    }
    return false;
  }
コード例 #2
0
ファイル: AlgoBIDEPlus.java プロジェクト: vikasmb/DataZoomer
  /**
   * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some
   * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details).
   *
   * @param prefix the current prefix
   * @return boolean true if we should not extend the prefix
   */
  private boolean checkBackScanPruning(SequentialPattern prefix, Set<Integer> sidset) {
    //
    // See the BIDE+ paper for details about this method.
    // For the number of item occurences that can be generated with this prefix:
    for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //						// END NEW

      int seqCount = 0;
      //			int highestSupportUntilNow = -1;

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      for (int sequenceID : sidset) {

        alreadyVisitedSID.add(sequenceID);

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);
        PseudoSequenceBIDE period =
            sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i);

        if (period != null) {
          //					// we add it to the list of maximum periods
          boolean hasExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1);
          if (hasExtension) {
            return true;
          }
        }
      }
    }
    return false;
  }
コード例 #3
0
ファイル: AlgoBIDEPlus.java プロジェクト: vikasmb/DataZoomer
  /**
   * Create a projected database by pseudo-projection
   *
   * @param item The item to use to make the pseudo-projection
   * @param context The current database.
   * @param inSuffix This boolean indicates if the item "item" is part of a suffix or not.
   * @return the projected database.
   */
  private List<PseudoSequenceBIDE> buildProjectedDatabase(
      Integer item, List<PseudoSequenceBIDE> database, boolean inSuffix, Set<Integer> sidset) {
    // The projected pseudo-database
    List<PseudoSequenceBIDE> sequenceDatabase = new ArrayList<PseudoSequenceBIDE>();

    // for each sequence
    loop1:
    for (PseudoSequenceBIDE sequence : database) {

      if (sidset.contains(sequence.getId()) == false) {
        continue;
      }

      // for each item of the sequence
      for (int i = 0; i < sequence.size(); i++) {

        int sizeOfItemsetAti = sequence.getSizeOfItemsetAt(i);

        // check if the itemset contains the item that we use for the projection
        int index = sequence.indexOf(sizeOfItemsetAti, i, item);
        // if it does not, and the current item is part of a suffix if inSuffix is true
        //   and vice-versa
        if (index != -1 && sequence.isPostfix(i) == inSuffix) {
          if (index != sizeOfItemsetAti - 1) { // if this is not the last item of the itemset
            // create a new pseudo sequence
            // add it to the projected database.
            sequenceDatabase.add(new PseudoSequenceBIDE(sequence, i, index + 1));
            //							continue loop1;
            //						}
          } else if ((i
              != sequence.size() - 1)) { // if this is not the last itemset of the sequence			
            // create a new pseudo sequence
            // add it to the projected database.
            sequenceDatabase.add(new PseudoSequenceBIDE(sequence, i + 1, 0));
            //						continue loop1;
          }
        }
      }
    }
    return sequenceDatabase; // return the projected database
  }
コード例 #4
0
ファイル: AlgoBIDEPlus.java プロジェクト: vikasmb/DataZoomer
  /**
   * Method to find all frequent items in a projected sequence database
   *
   * @param sequences the set of sequences
   * @return A list of pairs, where a pair is an item with (1) booleans indicating if it is in an
   *     itemset that is "cut" at left or right (prefix or postfix) and (2) the sequence IDs where
   *     it occurs.
   */
  protected Set<PairBIDE> findAllFrequentPairs(
      SequentialPattern prefix, List<PseudoSequenceBIDE> sequences) {
    // We use a Map the store the pairs.
    Map<PairBIDE, PairBIDE> mapPairs = new HashMap<PairBIDE, PairBIDE>();

    // for each sequence
    for (PseudoSequenceBIDE sequence : sequences) {
      // for each itemset
      for (int i = 0; i < sequence.size(); i++) {
        // for each item
        for (int j = 0; j < sequence.getSizeOfItemsetAt(i); j++) {
          Integer item = sequence.getItemAtInItemsetAt(j, i);
          // create the pair corresponding to this item
          PairBIDE pair = new PairBIDE(sequence.isCutAtRight(i), sequence.isPostfix(i), item);
          // register this sequenceID for that pair.
          addPairWithoutCheck(mapPairs, sequence.getId(), pair);
        }
      }
    }
    // check the memory usage
    MemoryLogger.getInstance().checkMemory();
    return mapPairs.keySet(); // return the pairs.
  }
コード例 #5
0
ファイル: AlgoMaxSP.java プロジェクト: SigmaX/SPMFfork
  /**
   * Create a projected database by pseudo-projection
   *
   * @param item The item to use to make the pseudo-projection
   * @param context The current database.
   * @param inSuffix This boolean indicates if the item "item" is part of a suffix or not.
   * @return the projected database.
   */
  private List<PseudoSequenceBIDE> buildProjectedContextSingleItem(
      Integer item,
      Map<Integer, PseudoSequenceBIDE> initialDatabase2,
      boolean inSuffix,
      Set<Integer> sidset) {
    // The projected pseudo-database
    List<PseudoSequenceBIDE> sequenceDatabase = new ArrayList<PseudoSequenceBIDE>();

    // for each sequence
    loop1:
    for (int sid : sidset) {
      PseudoSequenceBIDE sequence = initialDatabase2.get(sid);

      // for each itemset of the sequence
      for (int i = 0; i < sequence.size(); i++) {

        int sizeOfItemsetAti = sequence.getSizeOfItemsetAt(i);

        // find the position of the item used for the projection in this itemset if it appears
        int index = sequence.indexOf(sizeOfItemsetAti, i, item);
        // if it does appear and it is in a postfix/suffix if the item is in a postfix/suffix
        if (index != -1 && sequence.isPostfix(i) == inSuffix) {
          // if this is not the last item of the itemset
          if (index != sizeOfItemsetAti - 1) {
            // create a new pseudo sequence
            sequenceDatabase.add(new PseudoSequenceBIDE(sequence, i, index + 1));
            //						continue loop1;
          } else if (i
              != sequence.size() - 1) { // if this is not the last itemset of the sequence			
            // create a new pseudo sequence
            // if the size of this pseudo sequence is greater than 0
            // add it to the projected database.
            sequenceDatabase.add(new PseudoSequenceBIDE(sequence, i + 1, 0));
            //						continue loop1;
          }
        }
      }
    }
    return sequenceDatabase; // return the projected database
  }
コード例 #6
0
ファイル: AlgoBIDEPlus.java プロジェクト: vikasmb/DataZoomer
  /**
   * Method to update the support count of item in a maximum period
   *
   * @param prefix the current prefix
   * @param mapPaires
   * @param maximum periods a maximum period
   * @return a set of pairs indicating the support of items (note that a pair distinguish between
   *     items in a postfix, prefix...).
   */
  protected boolean findAllFrequentPairsForBackwardExtensionCheck(
      int seqProcessedCount,
      SequentialPattern prefix,
      PseudoSequenceBIDE maximumPeriod,
      int iPeriod,
      Map<PairBIDE, PairBIDE> mapPaires,
      Integer itemI,
      Integer itemIm1) {

    int supportToMatch = prefix.getSequenceIDs().size();
    int maxPeriodSize = maximumPeriod.size();

    // for each itemset in that period
    for (int i = 0; i < maxPeriodSize; i++) {
      int sizeOfItemsetAtI = maximumPeriod.getSizeOfItemsetAt(i);

      // NEW
      boolean sawI = false; // sawI after current position
      boolean sawIm1 = false; // sawI-1 before current position
      // END NEW

      // NEW march 20 2010 : check if I is after current position in current itemset
      for (int j = 0; j < sizeOfItemsetAtI; j++) {
        Integer item = maximumPeriod.getItemAtInItemsetAt(j, i);
        if (item.equals(itemI)) {
          sawI = true;
        } else if (item > itemI) {
          break;
        }
      }
      // END NEW

      for (int j = 0; j < sizeOfItemsetAtI; j++) {
        Integer item = maximumPeriod.getItemAtInItemsetAt(j, i);

        if (itemIm1 != null && item == itemIm1) {
          sawIm1 = true;
        }

        boolean isPrefix = maximumPeriod.isCutAtRight(i);
        boolean isPostfix = maximumPeriod.isPostfix(i);
        // END NEW

        PairBIDE paire = new PairBIDE(isPrefix, isPostfix, item);
        if (seqProcessedCount >= minsuppAbsolute) {
          // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
          // normal case
          if (addPair(mapPaires, maximumPeriod.getId(), paire, supportToMatch)) {
            return true;
          }

          // NEW: special cases
          if (sawIm1) {
            PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item);
            if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) {
              return true;
            }
          }

          if (sawI) {
            PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item);
            if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) {
              return true;
            }
          }
          // END NEW
        } else { // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
          // normal case
          addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire);

          //					// NEW: special cases
          if (sawIm1) {
            PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item);
            addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2);
          }

          if (sawI) {
            PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item);
            addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2);
          }
          // END NEW
        }
      }
    }
    return false;
  }
コード例 #7
0
ファイル: AlgoBIDEPlus.java プロジェクト: vikasmb/DataZoomer
  /**
   * Method to check if a prefix has a backward-extension (see Bide+ article for full details). This
   * method do it a little bit differently than the BIDE+ article since we iterate with i on
   * elements of the prefix instead of iterating with a i on the itemsets of the prefix. But the
   * idea is the same!
   *
   * @param prefix the current prefix
   * @return boolean true, if there is a backward extension
   */
  private boolean checkBackwardExtension(SequentialPattern prefix, Set<Integer> sidset) {
    //		System.out.println("======" + prefix);

    int totalOccurenceCount = prefix.getItemOccurencesTotalCount();
    // For the ith item of the prefix
    for (int i = 0; i < totalOccurenceCount; i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      //			// SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //			// END NEW

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      //			int seqCount =0;
      int highestSupportUntilNow = -1;

      // 1703 pat -  9391 ms
      for (int sequenceID : sidset) {
        // OPTIMIZATION  PART 1==  DON'T CHECK THE BACK EXTENSION IF THERE IS NOT ENOUGH SEQUENCE
        // LEFT TO FIND AN EXTENSION
        // THIS CAN IMPROVE THE PERFORMANCE BY UP TO 30% on FIFA
        int remainingSeqID = (sidset.size() - alreadyVisitedSID.size());
        if (highestSupportUntilNow != -1
            && highestSupportUntilNow + remainingSeqID < sidset.size()) {
          break;
        }

        alreadyVisitedSID.add(sequenceID);
        //				if(!alreadyVisitedSID.contains(sequenceID)) {
        //					seqCount++;
        //					alreadyVisitedSID.add(sequenceID);
        //				}
        // END OF OPTIMIZATION PART 1 (IT CONTINUES A FEW LINES BELOW...)

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);

        PseudoSequenceBIDE period = sequence.getIthMaximumPeriodOfAPrefix(prefix.getItemsets(), i);

        // if the period is not null
        if (period != null) {

          boolean hasBackwardExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1);

          if (hasBackwardExtension) {
            //						System.out.println(prefix + " has a backward extension from " + i + "th
            // maxperiod  in sequence from seq. " + sequenceID );
            return true;
          }
          // ===== OPTIMIZATION PART 2
          if ((sidset.size() - alreadyVisitedSID.size()) < minsuppAbsolute) {
            for (PairBIDE pair : mapPaires.values()) {
              int supportOfPair = pair.getSequenceIDs().size();

              if (supportOfPair > highestSupportUntilNow) {
                highestSupportUntilNow =
                    supportOfPair; // +1 because it may be raised for this sequence...
              }
            }
          }
          // ===== END OF OPTIMIZATION PART 2
        }
      }
    }
    //		totaltimeForBackwardExtension += System.currentTimeMillis() - start;
    return false; // no backward extension, we return false
  }