/**
   * This method saves a sequential pattern to the output file or in memory, depending on if the
   * user provided an output file path or not when he launched the algorithm
   *
   * @param prefix the pattern to be saved.
   * @throws IOException exception if error while writing the output file.
   */
  private void savePattern(SequentialPattern prefix) throws IOException {
    // increase the number of pattern found for statistics purposes
    patternCount++;

    // if the result should be saved to a file
    if (writer != null) {
      StringBuffer r = new StringBuffer("");
      for (Itemset itemset : prefix.getItemsets()) {
        //			r.append('(');
        for (String item : itemset.getItems()) {
          String string = item.toString();
          r.append(string);
          r.append(' ');
        }
        r.append("-1 ");
      }
      //
      //		//  print the list of Pattern IDs that contains this pattern.
      //		if(prefix.getSequencesID() != null){
      //			r.append("SID: ");
      //			for(Integer id : prefix.getSequencesID()){
      //				r.append(id);
      //				r.append(' ');
      //			}
      //		}
      r.append(" #SUP: ");
      r.append(prefix.getSequencesID().size());

      writer.write(r.toString());
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      patterns.addSequence(prefix, prefix.size());
    }
  }
示例#2
0
  /**
   * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some
   * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details).
   *
   * @param prefix the current prefix
   * @param projectedContext the projected database
   * @return boolean true if we should not extend the prefix
   */
  private boolean checkBackScanPruning(
      SequentialPattern prefix, List<PseudoSequenceBIDE> projectedContext) {

    // DEBUGGIN
    if (prefix.size() == 1 && prefix.get(0).get(0) == 5) {
      System.out.println("PREFIX 5 ");
    }
    //
    // See the BIDE+ paper for details about this method.
    // For the number of item occurences that can be generated with this prefix:
    for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //						// END NEW

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      for (PseudoSequenceBIDE pseudoSequence : projectedContext) {
        int sequenceID = pseudoSequence.sequence.getId();
        alreadyVisitedSID.add(pseudoSequence.sequence.getId());

        Position currentCutPosition =
            new Position(pseudoSequence.firstItemset, pseudoSequence.firstItem);

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);
        PseudoSequenceBIDE period =
            sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i, currentCutPosition);

        if (period != null) {
          //					// we add it to the list of maximum periods
          boolean hasExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  prefix.getAbsoluteSupport(),
                  prefix,
                  period,
                  i,
                  mapPaires,
                  itemI,
                  itemIm1,
                  currentCutPosition);
          if (hasExtension) {
            return true;
          }
        }
      }
    }
    return false;
  }
示例#3
0
  /**
   * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some
   * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details).
   *
   * @param prefix the current prefix
   * @return boolean true if we should not extend the prefix
   */
  private boolean checkBackScanPruning(SequentialPattern prefix, Set<Integer> sidset) {
    //
    // See the BIDE+ paper for details about this method.
    // For the number of item occurences that can be generated with this prefix:
    for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //						// END NEW

      int seqCount = 0;
      //			int highestSupportUntilNow = -1;

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      for (int sequenceID : sidset) {

        alreadyVisitedSID.add(sequenceID);

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);
        PseudoSequenceBIDE period =
            sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i);

        if (period != null) {
          //					// we add it to the list of maximum periods
          boolean hasExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1);
          if (hasExtension) {
            return true;
          }
        }
      }
    }
    return false;
  }
示例#4
0
  /**
   * Method to check if a prefix has a backward-extension (see Bide+ article for full details). This
   * method do it a little bit differently than the BIDE+ article since we iterate with i on
   * elements of the prefix instead of iterating with a i on the itemsets of the prefix. But the
   * idea is the same!
   *
   * @param prefix the current prefix
   * @return boolean true, if there is a backward extension
   */
  private boolean checkBackwardExtension(SequentialPattern prefix, Set<Integer> sidset) {
    //		System.out.println("======" + prefix);

    int totalOccurenceCount = prefix.getItemOccurencesTotalCount();
    // For the ith item of the prefix
    for (int i = 0; i < totalOccurenceCount; i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      //			// SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //			// END NEW

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      //			int seqCount =0;
      int highestSupportUntilNow = -1;

      // 1703 pat -  9391 ms
      for (int sequenceID : sidset) {
        // OPTIMIZATION  PART 1==  DON'T CHECK THE BACK EXTENSION IF THERE IS NOT ENOUGH SEQUENCE
        // LEFT TO FIND AN EXTENSION
        // THIS CAN IMPROVE THE PERFORMANCE BY UP TO 30% on FIFA
        int remainingSeqID = (sidset.size() - alreadyVisitedSID.size());
        if (highestSupportUntilNow != -1
            && highestSupportUntilNow + remainingSeqID < sidset.size()) {
          break;
        }

        alreadyVisitedSID.add(sequenceID);
        //				if(!alreadyVisitedSID.contains(sequenceID)) {
        //					seqCount++;
        //					alreadyVisitedSID.add(sequenceID);
        //				}
        // END OF OPTIMIZATION PART 1 (IT CONTINUES A FEW LINES BELOW...)

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);

        PseudoSequenceBIDE period = sequence.getIthMaximumPeriodOfAPrefix(prefix.getItemsets(), i);

        // if the period is not null
        if (period != null) {

          boolean hasBackwardExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  alreadyVisitedSID.size(), prefix, period, i, mapPaires, itemI, itemIm1);

          if (hasBackwardExtension) {
            //						System.out.println(prefix + " has a backward extension from " + i + "th
            // maxperiod  in sequence from seq. " + sequenceID );
            return true;
          }
          // ===== OPTIMIZATION PART 2
          if ((sidset.size() - alreadyVisitedSID.size()) < minsuppAbsolute) {
            for (PairBIDE pair : mapPaires.values()) {
              int supportOfPair = pair.getSequenceIDs().size();

              if (supportOfPair > highestSupportUntilNow) {
                highestSupportUntilNow =
                    supportOfPair; // +1 because it may be raised for this sequence...
              }
            }
          }
          // ===== END OF OPTIMIZATION PART 2
        }
      }
    }
    //		totaltimeForBackwardExtension += System.currentTimeMillis() - start;
    return false; // no backward extension, we return false
  }