示例#1
0
 /**
  * For each item, calculate the sequence id of sequences containing that item
  *
  * @param database the current sequence database
  * @return Map of items to sequence IDs that contains each item
  */
 private Map<Integer, Set<Integer>> findSequencesContainingItems(SequenceDatabase database) {
   // We use a map to store the sequence IDs where an item appear
   // Key : item   Value :  a set of sequence IDs
   Map<Integer, Set<Integer>> mapSequenceID =
       new HashMap<
           Integer,
           Set<
               Integer>>(); // pour conserver les ID des séquences: <Id Item, Set d'id de
                            // séquences>
   // for each sequence
   for (Sequence sequence : database.getSequences()) {
     // for each itemset in that sequence
     for (List<Integer> itemset : sequence.getItemsets()) {
       // for each item
       for (Integer item : itemset) {
         // get the set of sequence ids for that item
         Set<Integer> sequenceIDs = mapSequenceID.get(item);
         if (sequenceIDs == null) {
           // if null create a new set
           sequenceIDs = new HashSet<Integer>();
           mapSequenceID.put(item, sequenceIDs);
         }
         // add the current sequence id to this set
         sequenceIDs.add(sequence.getId());
       }
     }
   }
   return mapSequenceID;
 }
示例#2
0
  /**
   * This is the main method for the MaxSP algorithm.
   *
   * @param database a sequence database
   * @throws IOException exception if some error occurs while writing the output file.
   */
  private void maxSP(SequenceDatabase database, String outputFilePath) throws IOException {
    // if the user want to keep the result into memory
    if (outputFilePath == null) {
      writer = null;
      patterns = new SequentialPatterns("FREQUENT SEQUENTIAL PATTERNS");
    } else { // if the user want to save the result to a file
      patterns = null;
      writer = new BufferedWriter(new FileWriter(outputFilePath));
    }

    // The algorithm first scan the database to find all frequent items
    // The algorithm note the sequences in which these items appear.
    // This is stored in a map:  Key: item  Value : IDs of sequences containing the item
    Map<Integer, Set<Integer>> mapSequenceID = findSequencesContainingItems(database);

    // WE CONVERT THE DATABASE TO A PSEUDO-DATABASE, AND REMOVE
    // THE ITEMS OF SIZE 1 THAT ARE NOT FREQUENT, SO THAT THE ALGORITHM
    // WILL NOT CONSIDER THEM ANYMORE.

    // we create a database
    initialDatabase = new HashMap<Integer, PseudoSequenceBIDE>();
    // for each sequence of the original database
    for (Sequence sequence : database.getSequences()) {
      // we make a copy of the sequence while removing infrequent items
      Sequence optimizedSequence = sequence.cloneSequenceMinusItems(mapSequenceID, minsuppAbsolute);
      if (optimizedSequence.size() != 0) {
        // if this sequence has size >0, we add it to the new database
        initialDatabase.put(sequence.getId(), new PseudoSequenceBIDE(optimizedSequence, 0, 0));
      }
    }

    // For each frequent item
    loop1:
    for (Entry<Integer, Set<Integer>> entry : mapSequenceID.entrySet()) {
      // if the item is frequent
      if (entry.getValue().size() >= minsuppAbsolute) {

        // build the projected database with this item
        Integer item = entry.getKey();
        List<PseudoSequenceBIDE> projectedContext =
            buildProjectedContextSingleItem(item, initialDatabase, false, entry.getValue());

        // Create the prefix with this item
        SequentialPattern prefix = new SequentialPattern();
        prefix.addItemset(new Itemset(item));
        // set the sequence IDS of this prefix
        prefix.setSequenceIDs(entry.getValue());

        // variable to store the largest support of patterns
        // that will be found starting with this prefix
        if (projectedContext.size() >= minsuppAbsolute) {
          int successorSupport = 0;

          //					if(!checkBackScanPruning(prefix, entry.getValue())) {
          successorSupport = recursion(prefix, projectedContext); // récursion;
          //					}
          //					else
          //					{
          //						System.out.println("back scan pruned " + prefix);
          //					}

          // Finally, because this prefix has support > minsup
          // and passed the backscan pruning,
          // we check if it has no sucessor with support >= minsup
          // (a forward extension)
          // IF no forward extension
          if (successorSupport < minsuppAbsolute) { // ######### MODIFICATION ####
            // IF there is also no backward extension
            if (!checkBackwardExtension(prefix, entry.getValue())) {
              // the pattern is closed and we save it
              savePattern(prefix);
            }
          }
        } else {
          if (!checkBackwardExtension(prefix, entry.getValue())) {
            // the pattern is closed and we save it
            savePattern(prefix);
          }
        }
      }
    }
    // check the memory usage for statistics
    MemoryLogger.getInstance().checkMemory();
  }