Пример #1
0
  /**
   * This method saves a sequential pattern to the output file or in memory, depending on if the
   * user provided an output file path or not when he launched the algorithm
   *
   * @param prefix the pattern to be saved.
   * @throws IOException exception if error while writing the output file.
   */
  private void savePattern(SequentialPattern prefix) throws IOException {
    // increase the number of pattern found for statistics purposes
    patternCount++;

    // if the result should be saved to a file
    if (writer != null) {

      StringBuilder r = new StringBuilder("");
      for (Itemset itemset : prefix.getItemsets()) {
        //			r.append('(');
        for (Integer item : itemset.getItems()) {
          String string = item.toString();
          r.append(string);
          r.append(' ');
        }
        r.append("-1 ");
      }

      //		//  print the list of Pattern IDs that contains this pattern.
      //		if(prefix.getSequencesID() != null){
      //			r.append(" #SID: ");
      //			for(Integer id : prefix.getSequencesID()){
      //				r.append(id);
      //				r.append(' ');
      //			}
      //		}
      r.append(" #SUP: ");
      r.append(prefix.getSequenceIDs().size());
      if (showSequenceIdentifiers) {
        r.append(" #SID: ");
        for (Integer sid : prefix.getSequenceIDs()) {
          r.append(sid);
          r.append(" ");
        }
      }

      writer.write(r.toString());
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      patterns.addSequence(prefix, prefix.size());
    }
  }
Пример #2
0
  /**
   * Method to recursively grow a given sequential pattern.
   *
   * @param prefix the current sequential pattern that we want to try to grow
   * @param database the current projected sequence database
   * @throws IOException exception if there is an error writing to the output file
   */
  private int recursion(SequentialPattern prefix, List<PseudoSequenceBIDE> contexte)
      throws IOException {
    // find frequent items of size 1 in the current projected database.
    Set<PairBIDE> pairs = findAllFrequentPairs(prefix, contexte);

    // we will keep tract of the maximum support of patterns
    // that can be found with this prefix, to check
    // for forward extension when this method returns.
    int maxSupport = 0;

    // For each pair found (a pair is an item with a boolean indicating if it
    // appears in an itemset that is cut (a postfix) or not, and the sequence IDs
    // where it appears in the projected database).
    for (PairBIDE pair : pairs) {
      // if the item is frequent.
      if (pair.getCount() >= minsuppAbsolute) {
        // create the new postfix by appending this item to the prefix
        SequentialPattern newPrefix;
        // if the item is part of a postfix
        if (pair.isPostfix()) {
          // we append it to the last itemset of the prefix
          newPrefix = appendItemToPrefixOfSequence(prefix, pair.getItem()); // is =<is, (deltaT,i)>
        } else { // else, we append it as a new itemset to the sequence
          newPrefix = appendItemToSequence(prefix, pair.getItem());
        }
        // build the projected database with this item
        //				long start = System.currentTimeMillis();
        List<PseudoSequenceBIDE> projectedContext =
            buildProjectedDatabase(
                pair.getItem(), contexte, pair.isPostfix(), pair.getSequenceIDs());
        //				debugProjectDBTime += System.currentTimeMillis() - start;

        // create new prefix
        newPrefix.setSequenceIDs(pair.getSequenceIDs());

        // variable to keep track of the maximum support of extension
        // with this item and this prefix
        if (projectedContext.size() >= minsuppAbsolute) {

          int maxSupportOfSuccessors = 0;

          if (!checkBackScanPruning(newPrefix, pair.getSequenceIDs())) {
            maxSupportOfSuccessors = recursion(newPrefix, projectedContext); // récursion
          }

          // check the forward extension for the prefix
          // if no forward extension
          if (newPrefix.getSequenceIDs().size() != maxSupportOfSuccessors) {
            //  if there is no backward extension
            if (!checkBackwardExtension(newPrefix, pair.getSequenceIDs())) {
              // save the pattern
              savePattern(newPrefix);
            }
          }
        } else {
          if (!checkBackwardExtension(newPrefix, pair.getSequenceIDs())) {
            // save the pattern
            savePattern(newPrefix);
          }
        }

        // record the largest support of patterns found starting
        // with this prefix until now
        if (newPrefix.getAbsoluteSupport() > maxSupport) {
          maxSupport = newPrefix.getAbsoluteSupport();
        }
      }
    }
    return maxSupport; // return the maximum support generated by extension of the prefix
  }
Пример #3
0
  /**
   * Method to update the support count of item in a maximum period
   *
   * @param prefix the current prefix
   * @param mapPaires
   * @param maximum periods a maximum period
   * @return a set of pairs indicating the support of items (note that a pair distinguish between
   *     items in a postfix, prefix...).
   */
  protected boolean findAllFrequentPairsForBackwardExtensionCheck(
      int seqProcessedCount,
      SequentialPattern prefix,
      PseudoSequenceBIDE maximumPeriod,
      int iPeriod,
      Map<PairBIDE, PairBIDE> mapPaires,
      Integer itemI,
      Integer itemIm1) {

    int supportToMatch = prefix.getSequenceIDs().size();
    int maxPeriodSize = maximumPeriod.size();

    // for each itemset in that period
    for (int i = 0; i < maxPeriodSize; i++) {
      int sizeOfItemsetAtI = maximumPeriod.getSizeOfItemsetAt(i);

      // NEW
      boolean sawI = false; // sawI after current position
      boolean sawIm1 = false; // sawI-1 before current position
      // END NEW

      // NEW march 20 2010 : check if I is after current position in current itemset
      for (int j = 0; j < sizeOfItemsetAtI; j++) {
        Integer item = maximumPeriod.getItemAtInItemsetAt(j, i);
        if (item.equals(itemI)) {
          sawI = true;
        } else if (item > itemI) {
          break;
        }
      }
      // END NEW

      for (int j = 0; j < sizeOfItemsetAtI; j++) {
        Integer item = maximumPeriod.getItemAtInItemsetAt(j, i);

        if (itemIm1 != null && item == itemIm1) {
          sawIm1 = true;
        }

        boolean isPrefix = maximumPeriod.isCutAtRight(i);
        boolean isPostfix = maximumPeriod.isPostfix(i);
        // END NEW

        PairBIDE paire = new PairBIDE(isPrefix, isPostfix, item);
        if (seqProcessedCount >= minsuppAbsolute) {
          // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
          // normal case
          if (addPair(mapPaires, maximumPeriod.getId(), paire, supportToMatch)) {
            return true;
          }

          // NEW: special cases
          if (sawIm1) {
            PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item);
            if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) {
              return true;
            }
          }

          if (sawI) {
            PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item);
            if (addPair(mapPaires, maximumPeriod.getId(), paire2, supportToMatch)) {
              return true;
            }
          }
          // END NEW
        } else { // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
          // normal case
          addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire);

          //					// NEW: special cases
          if (sawIm1) {
            PairBIDE paire2 = new PairBIDE(isPrefix, !isPostfix, item);
            addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2);
          }

          if (sawI) {
            PairBIDE paire2 = new PairBIDE(!isPrefix, isPostfix, item);
            addPairWithoutCheck(mapPaires, maximumPeriod.getId(), paire2);
          }
          // END NEW
        }
      }
    }
    return false;
  }