/**
   * This is the main method for the PrefixSpan algorithm that is called to start the algorithm
   *
   * @param outputFilePath an output file path if the result should be saved to a file or null if
   *     the result should be saved to memory.
   * @param database a sequence database
   * @throws IOException exception if an error while writing the output file
   */
  private void prefixSpan(SequenceDatabase database, String outputFilePath) throws IOException {
    // if the user want to keep the result into memory
    if (outputFilePath == null) {
      writer = null;
      patterns = new SequentialPatterns("FREQUENT SEQUENTIAL PATTERNS");
    } else { // if the user want to save the result to a file
      patterns = null;
      writer = new BufferedWriter(new FileWriter(outputFilePath));
    }

    // We have to scan the database to find all frequent patterns of size 1.
    // We note the sequences in which these patterns appear.
    Map<String, Set<Integer>> mapSequenceID = findSequencesContainingItems(database);

    // WE CONVERT THE DATABASE ITON A PSEUDO-DATABASE, AND REMOVE
    // THE ITEMS OF SIZE 1 THAT ARE NOT FREQUENT, SO THAT THE ALGORITHM
    // WILL NOT CONSIDER THEM ANYMORE. (OPTIMIZATION : OCTOBER-08 )

    // Create a list of pseudosequence
    List<PseudoSequence> initialContext = new ArrayList<PseudoSequence>();
    // for each sequence in  the database
    for (Sequence sequence : database.getSequences()) {
      // remove infrequent items
      Sequence optimizedSequence = sequence.cloneSequenceMinusItems(mapSequenceID, minsuppAbsolute);
      if (optimizedSequence.size() != 0) {
        // if the size is > 0, create a pseudo sequence with this sequence
        initialContext.add(new PseudoSequence(optimizedSequence, 0, 0));
      }
    }

    // For each item
    for (Entry<String, Set<Integer>> entry : mapSequenceID.entrySet()) {
      // if the item is frequent  (has a support >= minsup)
      if (entry.getValue().size() >= minsuppAbsolute) { // if the item is frequent
        // build the projected context
        String item = entry.getKey();
        List<PseudoSequence> projectedContext = buildProjectedContext(item, initialContext, false);

        // Create the prefix for the projected context.
        SequentialPattern prefix = new SequentialPattern(0);
        prefix.addItemset(new Itemset(item));
        prefix.setSequencesID(entry.getValue());

        // The prefix is a frequent sequential pattern.
        // We save it in the result.
        savePattern(prefix); // we found a sequence.

        // Recursive call !
        recursion(prefix, projectedContext);
      }
    }
  }
Example #2
0
 /**
  * This method creates a copy of the sequence and add a given item as a new itemset to the
  * sequence. It sets the support of the sequence as the support of the item.
  *
  * @param prefix the sequence
  * @param item the item
  * @return the new sequence
  */
 private SequentialPattern appendItemToSequence(SequentialPattern prefix, Integer item) {
   SequentialPattern newPrefix = prefix.cloneSequence(); // isSuffix
   newPrefix.addItemset(new Itemset(item));
   return newPrefix;
 }
Example #3
0
  /**
   * This is the main method for the BIDE+ algorithm.
   *
   * @param database a sequence database
   * @throws IOException exception if some error occurs while writing the output file.
   */
  private void bide(SequenceDatabase database, String outputFilePath) throws IOException {
    // if the user want to keep the result into memory
    if (outputFilePath == null) {
      writer = null;
      patterns = new SequentialPatterns("FREQUENT SEQUENTIAL PATTERNS");
    } else { // if the user want to save the result to a file
      patterns = null;
      writer = new BufferedWriter(new FileWriter(outputFilePath));
    }

    // The algorithm first scan the database to find all frequent items
    // The algorithm note the sequences in which these items appear.
    // This is stored in a map:  Key: item  Value : IDs of sequences containing the item
    Map<Integer, Set<Integer>> mapSequenceID = findSequencesContainingItems(database);

    // WE CONVERT THE DATABASE TO A PSEUDO-DATABASE, AND REMOVE
    // THE ITEMS OF SIZE 1 THAT ARE NOT FREQUENT, SO THAT THE ALGORITHM
    // WILL NOT CONSIDER THEM ANYMORE.

    // OPTIMIZATION Create COOC MAP
    //		coocMapBefore = new HashMap<Integer, Map<Integer,
    // Integer>>(mapSequenceID.entrySet().size());

    // we create a database
    initialDatabase = new ArrayList<PseudoSequenceBIDE>();
    // for each sequence of the original database
    for (Sequence sequence : database.getSequences()) {
      // we make a copy of the sequence while removing infrequent items
      Sequence optimizedSequence = sequence.cloneSequenceMinusItems(mapSequenceID, minsuppAbsolute);
      if (optimizedSequence.size() != 0) {
        // if this sequence has size >0, we add it to the new database
        initialDatabase.add(new PseudoSequenceBIDE(optimizedSequence, 0, 0));
      }

      //			// update COOC map
      //			HashSet<Integer> alreadySeen = new HashSet<Integer>();
      //			for(List<Integer> itemset : optimizedSequence.getItemsets()) {
      //				for(Integer item : itemset) {
      //					Map<Integer, Integer> mapCoocItem = coocMapBefore.get(item);
      //					if(mapCoocItem == null) {
      //						mapCoocItem = new HashMap<Integer, Integer>();
      //						coocMapBefore.put(item, mapCoocItem);
      //					}
      //					for(Integer itemSeen : alreadySeen) {
      //						if(itemSeen != item) {
      //							Integer frequency = mapCoocItem.get(itemSeen);
      //							if(frequency == null) {
      //								mapCoocItem.put(itemSeen, 1);
      //							}else {
      //								mapCoocItem.put(itemSeen, frequency+1);
      //							}
      //						}
      //					}
      //					alreadySeen.add(item);
      //				}
      //			}
    }

    // For each frequent item
    loop1:
    for (Entry<Integer, Set<Integer>> entry : mapSequenceID.entrySet()) {
      // if the item is frequent
      if (entry.getValue().size() >= minsuppAbsolute) {
        //				Map<Integer, Integer> mapCoocItem = coocMapBefore.get(entry.getKey());
        //				if(mapCoocItem != null) {
        //					for(Integer supportCoocBefore : mapCoocItem.values()) {
        //						if(supportCoocBefore >= entry.getValue().size()) {
        //							continue loop1;
        //						}
        //					}
        //				}

        // build the projected database with this item
        Integer item = entry.getKey();
        List<PseudoSequenceBIDE> projectedContext =
            buildProjectedContextSingleItem(item, initialDatabase, false, entry.getValue());

        // Create the prefix with this item
        SequentialPattern prefix = new SequentialPattern();
        prefix.addItemset(new Itemset(item));
        // set the sequence IDS of this prefix
        prefix.setSequenceIDs(entry.getValue());

        // variable to store the largest support of patterns
        // that will be found starting with this prefix
        if (projectedContext.size() >= minsuppAbsolute) {
          int successorSupport = 0;

          if (!checkBackScanPruning(prefix, entry.getValue())) {
            successorSupport = recursion(prefix, projectedContext); // récursion;
          }

          // Finally, because this prefix has support > minsup
          // and passed the backscan pruning,
          // we check if it has no sucessor with support >= minsup
          // (a forward extension)
          // IF no forward extension
          if (successorSupport != entry.getValue().size()) { // ######### MODIFICATION ####
            // IF there is also no backward extension
            if (!checkBackwardExtension(prefix, entry.getValue())) {
              // the pattern is closed and we save it
              savePattern(prefix);
            }
          }
        } else {
          if (!checkBackwardExtension(prefix, entry.getValue())) {
            // the pattern is closed and we save it
            savePattern(prefix);
          }
        }
      }
    }
    // check the memory usage for statistics
    MemoryLogger.getInstance().checkMemory();
  }
 /**
  * This method creates a copy of the sequence and add a given item as a new itemset to the
  * sequence. It sets the support of the sequence as the support of the item.
  *
  * @param prefix the sequence
  * @param item the item
  * @return the new sequence
  */
 private SequentialPattern appendItemToSequence(SequentialPattern prefix, String item) {
   SequentialPattern newPrefix = prefix.cloneSequence();
   newPrefix.addItemset(new Itemset(item));
   return newPrefix;
 }