Ejemplo n.º 1
0
 /**
  * This method creates a copy of the sequence and add a given item to the last itemset of the
  * sequence. It sets the support of the sequence as the support of the item.
  *
  * @param prefix the sequence
  * @param item the item
  * @return the new sequence
  */
 private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, Integer item) {
   SequentialPattern newPrefix = prefix.cloneSequence();
   // add to the last itemset
   Itemset itemset = newPrefix.get(newPrefix.size() - 1);
   itemset.addItem(item);
   return newPrefix;
 }
  /**
   * This method saves a sequential pattern to the output file or in memory, depending on if the
   * user provided an output file path or not when he launched the algorithm
   *
   * @param prefix the pattern to be saved.
   * @throws IOException exception if error while writing the output file.
   */
  private void savePattern(SequentialPattern prefix) throws IOException {
    // increase the number of pattern found for statistics purposes
    patternCount++;

    // if the result should be saved to a file
    if (writer != null) {
      StringBuffer r = new StringBuffer("");
      for (Itemset itemset : prefix.getItemsets()) {
        //			r.append('(');
        for (String item : itemset.getItems()) {
          String string = item.toString();
          r.append(string);
          r.append(' ');
        }
        r.append("-1 ");
      }
      //
      //		//  print the list of Pattern IDs that contains this pattern.
      //		if(prefix.getSequencesID() != null){
      //			r.append("SID: ");
      //			for(Integer id : prefix.getSequencesID()){
      //				r.append(id);
      //				r.append(' ');
      //			}
      //		}
      r.append(" #SUP: ");
      r.append(prefix.getSequencesID().size());

      writer.write(r.toString());
      writer.newLine();
    } // otherwise the result is kept into memory
    else {
      patterns.addSequence(prefix, prefix.size());
    }
  }
Ejemplo n.º 3
0
  /**
   * This is the "backscan-pruning" strategy described in the BIDE+ paper to avoid extending some
   * prefixs that are guaranteed to not generate a closed pattern (see the BIDE+ paper for details).
   *
   * @param prefix the current prefix
   * @param projectedContext the projected database
   * @return boolean true if we should not extend the prefix
   */
  private boolean checkBackScanPruning(
      SequentialPattern prefix, List<PseudoSequenceBIDE> projectedContext) {

    // DEBUGGIN
    if (prefix.size() == 1 && prefix.get(0).get(0) == 5) {
      System.out.println("PREFIX 5 ");
    }
    //
    // See the BIDE+ paper for details about this method.
    // For the number of item occurences that can be generated with this prefix:
    for (int i = 0; i < prefix.getItemOccurencesTotalCount(); i++) {

      Set<Integer> alreadyVisitedSID = new HashSet<Integer>();

      // Create a Map of pairs to count the support of items (represented by a pair)
      // in the ith semi-maximum periods
      Map<PairBIDE, PairBIDE> mapPaires = new HashMap<PairBIDE, PairBIDE>();

      // SOME CODE USED BY "findAllFrequentPairsForBackwardExtensionCheck"
      Integer itemI = prefix.getIthItem(i); // iPeriod
      Integer itemIm1 = null; // iPeriod -1
      if (i > 0) {
        itemIm1 = prefix.getIthItem(i - 1);
      }
      //						// END NEW

      // (1) For each i, we build the list of maximum periods
      // for each sequence in the original database
      for (PseudoSequenceBIDE pseudoSequence : projectedContext) {
        int sequenceID = pseudoSequence.sequence.getId();
        alreadyVisitedSID.add(pseudoSequence.sequence.getId());

        Position currentCutPosition =
            new Position(pseudoSequence.firstItemset, pseudoSequence.firstItem);

        PseudoSequenceBIDE sequence = initialDatabase.get(sequenceID);
        PseudoSequenceBIDE period =
            sequence.getIthSemiMaximumPeriodOfAPrefix(prefix.getItemsets(), i, currentCutPosition);

        if (period != null) {
          //					// we add it to the list of maximum periods
          boolean hasExtension =
              findAllFrequentPairsForBackwardExtensionCheck(
                  prefix.getAbsoluteSupport(),
                  prefix,
                  period,
                  i,
                  mapPaires,
                  itemI,
                  itemIm1,
                  currentCutPosition);
          if (hasExtension) {
            return true;
          }
        }
      }
    }
    return false;
  }
 /**
  * This method creates a copy of the sequence and add a given item to the last itemset of the
  * sequence. It sets the support of the sequence as the support of the item.
  *
  * @param prefix the sequence
  * @param item the item
  * @return the new sequence
  */
 private SequentialPattern appendItemToPrefixOfSequence(SequentialPattern prefix, String item) {
   SequentialPattern newPrefix = prefix.cloneSequence();
   Itemset itemset = newPrefix.get(newPrefix.size() - 1);
   itemset.addItem(item);
   return newPrefix;
 }
Ejemplo n.º 5
0
  /**
   * Method to recursively grow a given sequential pattern.
   *
   * @param prefix the current sequential pattern that we want to try to grow
   * @param database the current projected sequence database
   * @throws IOException exception if there is an error writing to the output file
   */
  private int recursion(SequentialPattern prefix, List<PseudoSequenceBIDE> contexte)
      throws IOException {

    if (prefix.size() == 1 && prefix.get(0).get(0) == 5) {
      System.out.println("PREFIXBY5 : " + prefix);
    }

    // find frequent items of size 1 in the current projected database.
    Set<PairBIDE> pairs = findAllFrequentPairs(prefix, contexte);

    // DEBUGING
    System.out.println("PREFIX: " + prefix);
    if (prefix.size() == 2
        && prefix.get(0).size() == 3
        && prefix.get(1).size() == 3
        && prefix.get(0).get(0) == 1
        && prefix.get(0).get(1) == 3
        && prefix.get(0).get(2) == 5
        && prefix.get(1).get(0) == 2
        && prefix.get(1).get(1) == 3
        && prefix.get(1).get(2) == 5) {
      System.out.println("DEBUGGING !!!!!!!! #### (1 3 5 )(2 3 5 )");
    }
    if (prefix.size() == 2
        && prefix.get(0).size() == 3
        && prefix.get(1).size() == 2
        && prefix.get(0).get(0) == 1
        && prefix.get(0).get(1) == 3
        && prefix.get(0).get(2) == 5
        && prefix.get(1).get(0) == 2
        && prefix.get(1).get(1) == 3) {
      System.out.println("DEBUGGING !!!!!!!! #### (1 3 5 )(2 3 )");
    }

    // we will keep tract of the maximum support of patterns
    // that can be found with this prefix, to check
    // for forward extension when this method returns.
    int maxSupport = 0;

    // For each pair found (a pair is an item with a boolean indicating if it
    // appears in an itemset that is cut (a postfix) or not, and the sequence IDs
    // where it appears in the projected database).
    for (PairBIDE pair : pairs) {
      // if the item is frequent.
      if (pair.getCount() >= minsuppAbsolute) {
        // create the new postfix by appending this item to the prefix
        SequentialPattern newPrefix;
        // if the item is part of a postfix
        if (pair.isPostfix()) {
          // we append it to the last itemset of the prefix
          newPrefix = appendItemToPrefixOfSequence(prefix, pair.getItem()); // is =<is, (deltaT,i)>
        } else { // else, we append it as a new itemset to the sequence
          newPrefix = appendItemToSequence(prefix, pair.getItem());
        }
        // build the projected database with this item
        //				long start = System.currentTimeMillis();
        List<PseudoSequenceBIDE> projectedContext =
            buildProjectedDatabase(
                pair.getItem(), contexte, pair.isPostfix(), pair.getSequenceIDs());
        //				debugProjectDBTime += System.currentTimeMillis() - start;

        // create new prefix
        newPrefix.setSequenceIDs(pair.getSequenceIDs());

        // variable to keep track of the maximum support of extension
        // with this item and this prefix
        if (projectedContext.size() >= minsuppAbsolute) {

          int maxSupportOfSuccessors = 0;

          if (!checkBackScanPruning(newPrefix, projectedContext)) {
            maxSupportOfSuccessors = recursion(newPrefix, projectedContext); // r�cursion
          }

          // check the forward extension for the prefix
          // if no forward extension
          if (newPrefix.getSequenceIDs().size() != maxSupportOfSuccessors) {
            //  if there is no backward extension
            if (!checkBackwardExtension(newPrefix, projectedContext)) {
              // save the pattern
              savePattern(newPrefix);
            }
          }
        } else {
          if (!checkBackwardExtension(newPrefix, projectedContext)) {
            // save the pattern
            savePattern(newPrefix);
          }
        }

        // record the largest support of patterns found starting
        // with this prefix until now
        if (newPrefix.getAbsoluteSupport() > maxSupport) {
          maxSupport = newPrefix.getAbsoluteSupport();
        }
      }
    }
    return maxSupport; // return the maximum support generated by extension of the prefix
  }