/** * Method to recursively grow a given sequential pattern. * * @param prefix the current sequential pattern that we want to try to grow * @param database the current projected sequence database * @throws IOException exception if there is an error writing to the output file */ private void recursion(SequentialPattern prefix, List<PseudoSequence> database) throws IOException { // find frequent items of size 1 in the current projected database. Set<Pair> pairs = findAllFrequentPairs(prefix, database); // For each pair found (a pair is an item with a boolean indicating if it // appears in an itemset that is cut (a postfix) or not, and the sequence IDs // where it appears in the projected database). for (Pair pair : pairs) { // if the item is frequent in the current projected database if (pair.getCount() >= minsuppAbsolute) { // create the new postfix by appending this item to the prefix SequentialPattern newPrefix; // if the item is part of a postfix if (pair.isPostfix()) { // we append it to the last itemset of the prefix newPrefix = appendItemToPrefixOfSequence(prefix, pair.getItem()); } else { // else, we append it as a new itemset to the sequence newPrefix = appendItemToSequence(prefix, pair.getItem()); } // build the projected database with this item List<PseudoSequence> projectedDB = buildProjectedContext(pair.getItem(), database, pair.isPostfix()); newPrefix.setSequencesID(pair.getSequencesID()); // save the pattern savePattern(newPrefix); // make a recursive call recursion(newPrefix, projectedDB); } } MemoryLogger.getInstance().checkMemory(); }
/** * This is the main method for the PrefixSpan algorithm that is called to start the algorithm * * @param outputFilePath an output file path if the result should be saved to a file or null if * the result should be saved to memory. * @param database a sequence database * @throws IOException exception if an error while writing the output file */ private void prefixSpan(SequenceDatabase database, String outputFilePath) throws IOException { // if the user want to keep the result into memory if (outputFilePath == null) { writer = null; patterns = new SequentialPatterns("FREQUENT SEQUENTIAL PATTERNS"); } else { // if the user want to save the result to a file patterns = null; writer = new BufferedWriter(new FileWriter(outputFilePath)); } // We have to scan the database to find all frequent patterns of size 1. // We note the sequences in which these patterns appear. Map<String, Set<Integer>> mapSequenceID = findSequencesContainingItems(database); // WE CONVERT THE DATABASE ITON A PSEUDO-DATABASE, AND REMOVE // THE ITEMS OF SIZE 1 THAT ARE NOT FREQUENT, SO THAT THE ALGORITHM // WILL NOT CONSIDER THEM ANYMORE. (OPTIMIZATION : OCTOBER-08 ) // Create a list of pseudosequence List<PseudoSequence> initialContext = new ArrayList<PseudoSequence>(); // for each sequence in the database for (Sequence sequence : database.getSequences()) { // remove infrequent items Sequence optimizedSequence = sequence.cloneSequenceMinusItems(mapSequenceID, minsuppAbsolute); if (optimizedSequence.size() != 0) { // if the size is > 0, create a pseudo sequence with this sequence initialContext.add(new PseudoSequence(optimizedSequence, 0, 0)); } } // For each item for (Entry<String, Set<Integer>> entry : mapSequenceID.entrySet()) { // if the item is frequent (has a support >= minsup) if (entry.getValue().size() >= minsuppAbsolute) { // if the item is frequent // build the projected context String item = entry.getKey(); List<PseudoSequence> projectedContext = buildProjectedContext(item, initialContext, false); // Create the prefix for the projected context. SequentialPattern prefix = new SequentialPattern(0); prefix.addItemset(new Itemset(item)); prefix.setSequencesID(entry.getValue()); // The prefix is a frequent sequential pattern. // We save it in the result. savePattern(prefix); // we found a sequence. // Recursive call ! recursion(prefix, projectedContext); } } }