コード例 #1
0
ファイル: AlgoBIDEPlus.java プロジェクト: vikasmb/DataZoomer
  /**
   * Method to check if a prefix has a backward-extension (see Bide+ article for full details). This
   * method do it a little bit differently than the BIDE+ article since we iterate with i on
   * elements of the prefix instead of iterating with a i on the itemsets of the prefix. But the
   * idea is the same!
   *
   * @param prefix the current prefix
   * @return boolean true, if there is a backward extension
   */
  private boolean checkBackwardExtension(SequentialPattern prefix, Set<Integer> sidset) {
    //		System.out.println("======" + prefix);

    int totalOccurenceCount = prefix.getItemOccurencesTotalCount();
    // For the ith item of the prefix
    for (int i = 0; i < totalOccurenceCount; i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      //			// SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //			// END NEW

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      //			int seqCount =0;
      int highestSupportUntilNow = -1;

      // 1703 pat -  9391 ms
      for (int sequenceID : sidset) {
        // OPTIMIZATION  PART 1==  DON'T CHECK THE BACK EXTENSION IF THERE IS NOT ENOUGH SEQUENCE
        // LEFT TO FIND AN EXTENSION
        // THIS CAN IMPROVE THE PERFORMANCE BY UP TO 30% on FIFA
        int remainingSeqID = (sidset.size() - alreadyVisitedSID.size());
        if (highestSupportUntilNow != -1
            && highestSupportUntilNow + remainingSeqID < sidset.size()) {
          break;
        }

        alreadyVisitedSID.add(sequenceID);
        //				if(!alreadyVisitedSID.contains(sequenceID)) {
        //					seqCount++;
        //					alreadyVisitedSID.add(sequenceID);
        //				}
        // END OF OPTIMIZATION PART 1 (IT CONTINUES A FEW LINES BELOW...)

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);

        PseudoSequenceBIDE period = sequence.getIthMaximumPeriodOfAPrefix(prefix.getItemsets(), i);

        // if the period is not null
        if (period != null) {

          boolean hasBackwardExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1);

          if (hasBackwardExtension) {
            //						System.out.println(prefix + " has a backward extension from " + i + "th
            // maxperiod  in sequence from seq. " + sequenceID );
            return true;
          }
          // ===== OPTIMIZATION PART 2
          if ((sidset.size() - alreadyVisitedSID.size()) < minsuppAbsolute) {
            for (PairBIDE pair : mapPaires.values()) {
              int supportOfPair = pair.getSequenceIDs().size();

              if (supportOfPair > highestSupportUntilNow) {
                highestSupportUntilNow =
                    supportOfPair; // +1 because it may be raised for this sequence...
              }
            }
          }
          // ===== END OF OPTIMIZATION PART 2
        }
      }
    }
    //		totaltimeForBackwardExtension += System.currentTimeMillis() - start;
    return false; // no backward extension, we return false
  }