/** * For each item, calculate the sequence id of sequences containing that item * * @param database the current sequence database * @return Map of items to sequence IDs that contains each item */ private Map<Integer, Set<Integer>> findSequencesContainingItems(SequenceDatabase database) { // We use a map to store the sequence IDs where an item appear // Key : item Value : a set of sequence IDs Map<Integer, Set<Integer>> mapSequenceID = new HashMap< Integer, Set< Integer>>(); // pour conserver les ID des séquences: <Id Item, Set d'id de // séquences> // for each sequence for (Sequence sequence : database.getSequences()) { // for each itemset in that sequence for (List<Integer> itemset : sequence.getItemsets()) { // for each item for (Integer item : itemset) { // get the set of sequence ids for that item Set<Integer> sequenceIDs = mapSequenceID.get(item); if (sequenceIDs == null) { // if null create a new set sequenceIDs = new HashSet<Integer>(); mapSequenceID.put(item, sequenceIDs); } // add the current sequence id to this set sequenceIDs.add(sequence.getId()); } } } return mapSequenceID; }
/** * This is the main method for the MaxSP algorithm. * * @param database a sequence database * @throws IOException exception if some error occurs while writing the output file. */ private void maxSP(SequenceDatabase database, String outputFilePath) throws IOException { // if the user want to keep the result into memory if (outputFilePath == null) { writer = null; patterns = new SequentialPatterns("FREQUENT SEQUENTIAL PATTERNS"); } else { // if the user want to save the result to a file patterns = null; writer = new BufferedWriter(new FileWriter(outputFilePath)); } // The algorithm first scan the database to find all frequent items // The algorithm note the sequences in which these items appear. // This is stored in a map: Key: item Value : IDs of sequences containing the item Map<Integer, Set<Integer>> mapSequenceID = findSequencesContainingItems(database); // WE CONVERT THE DATABASE TO A PSEUDO-DATABASE, AND REMOVE // THE ITEMS OF SIZE 1 THAT ARE NOT FREQUENT, SO THAT THE ALGORITHM // WILL NOT CONSIDER THEM ANYMORE. // we create a database initialDatabase = new HashMap<Integer, PseudoSequenceBIDE>(); // for each sequence of the original database for (Sequence sequence : database.getSequences()) { // we make a copy of the sequence while removing infrequent items Sequence optimizedSequence = sequence.cloneSequenceMinusItems(mapSequenceID, minsuppAbsolute); if (optimizedSequence.size() != 0) { // if this sequence has size >0, we add it to the new database initialDatabase.put(sequence.getId(), new PseudoSequenceBIDE(optimizedSequence, 0, 0)); } } // For each frequent item loop1: for (Entry<Integer, Set<Integer>> entry : mapSequenceID.entrySet()) { // if the item is frequent if (entry.getValue().size() >= minsuppAbsolute) { // build the projected database with this item Integer item = entry.getKey(); List<PseudoSequenceBIDE> projectedContext = buildProjectedContextSingleItem(item, initialDatabase, false, entry.getValue()); // Create the prefix with this item SequentialPattern prefix = new SequentialPattern(); prefix.addItemset(new Itemset(item)); // set the sequence IDS of this prefix prefix.setSequenceIDs(entry.getValue()); // variable to store the largest support of patterns // that will be found starting with this prefix if (projectedContext.size() >= minsuppAbsolute) { int successorSupport = 0; // if(!checkBackScanPruning(prefix, entry.getValue())) { successorSupport = recursion(prefix, projectedContext); // récursion; // } // else // { // System.out.println("back scan pruned " + prefix); // } // Finally, because this prefix has support > minsup // and passed the backscan pruning, // we check if it has no sucessor with support >= minsup // (a forward extension) // IF no forward extension if (successorSupport < minsuppAbsolute) { // ######### MODIFICATION #### // IF there is also no backward extension if (!checkBackwardExtension(prefix, entry.getValue())) { // the pattern is closed and we save it savePattern(prefix); } } } else { if (!checkBackwardExtension(prefix, entry.getValue())) { // the pattern is closed and we save it savePattern(prefix); } } } } // check the memory usage for statistics MemoryLogger.getInstance().checkMemory(); }