/** * Method to find all frequent items in a projected sequence database * * @param sequences the set of sequences * @return A list of pairs, where a pair is an item with (1) a boolean indicating if it is in an * itemset that is "cut" and (2) the sequence IDs where it occurs. */ protected Set<Pair> findAllFrequentPairs( SequentialPattern prefix, List<PseudoSequence> sequences) { // We use a Map the store the pairs. Map<Pair, Pair> mapPairs = new HashMap<Pair, Pair>(); // for each sequence for (PseudoSequence sequence : sequences) { // for each itemset for (int i = 0; i < sequence.size(); i++) { // for each item for (int j = 0; j < sequence.getSizeOfItemsetAt(i); j++) { String item = sequence.getItemAtInItemsetAt(j, i); // create the pair corresponding to this item Pair paire = new Pair(sequence.isPostfix(i), item); // false is ok? // get the pair object store in the map if there is one already Pair oldPaire = mapPairs.get(paire); // if there is no pair object yet if (oldPaire == null) { // store the pair object that we created mapPairs.put(paire, paire); } else { // otherwise use the old one paire = oldPaire; } // record the current sequence id for that pair paire.getSequencesID().add(sequence.getId()); } } } MemoryLogger.getInstance().checkMemory(); // check the memory for statistics. // return the map of pairs return mapPairs.keySet(); }
/** * Create a projected database by pseudo-projection * * @param item The item to use to make the pseudo-projection * @param context The current database. * @param inSuffix This boolean indicates if the item "item" is part of a suffix or not. * @return the projected database. */ private List<PseudoSequence> buildProjectedContext( String item, List<PseudoSequence> database, boolean inSuffix) { // We create a new projected database List<PseudoSequence> sequenceDatabase = new ArrayList<PseudoSequence>(); // for each sequence in the database received as parameter for (PseudoSequence sequence : database) { // for each sequence for (int i = 0; i < sequence.size(); i++) { // for each item of the sequence // check if the itemset contains the item that we use for the projection int index = sequence.indexOf(i, item); // if it does not, and the current item is part of a suffix if inSuffix is true // and vice-versa if (index != -1 && sequence.isPostfix(i) == inSuffix) { // if this is not the last item of the itemset of this sequence if (index != sequence.getSizeOfItemsetAt(i) - 1) { // if this is not the last item of the itemset // create a new pseudo sequence PseudoSequence newSequence = new PseudoSequence(sequence, i, index + 1); if (newSequence.size() > 0) { sequenceDatabase.add(newSequence); } } else if ((i != sequence.size() - 1)) { // if this is not the last itemset of the sequence // create a new pseudo sequence PseudoSequence newSequence = new PseudoSequence(sequence, i + 1, 0); if (newSequence.size() > 0) { // if the size of this pseudo sequence is greater than 0 // add it to the projected database. sequenceDatabase.add(newSequence); } } } } } return sequenceDatabase; // return the projected database }