private void naiveExtendSuffix(TreeString string, int start) { EdgeMatch em = findEdge(root, string, start, string.length(), false); StringSuffix stringSuffix = new StringSuffix(string, start); TreeEdge leafEdge = null; if (em.completedMatch()) { leafEdge = em.lastEdge; } else { if (em.lastEdge == null) { leafEdge = new TreeEdge(string, start, string.length(), root); root.addEdge(leafEdge); } else { leafEdge = new TreeEdge(string, em.matchedTo, string.length(), em.lastEdge.tailNode); if (em.inEdgeMiddle()) { int offset = em.lastMatchLength(); em.lastEdge.split(offset); } em.lastEdge.tailNode.addEdge(leafEdge); } } leafEdge.tailNode.suffixes.add(stringSuffix); }
/* * ukkonenSEA(i, j) performs extension j of phase i of Ukkonen's algorithm. * This means that we're making sure that array[j,i] (note the inclusivity!) * is a part of the current suffix tree. * * Original Description: pg. 100 of Gusfield */ private boolean ukkonenSEA(int i, int j) { logger.exiting("UkkonenSuffixTree", "ukkonenSEA"); logger.log(Level.FINEST, String.format("j=%d", j)); assert j <= i; boolean rule3 = false; TreeNode newRule2Node = null; EdgeMatch m = extState.matcher; char lastChar = extState.string.getChar(i); boolean lastCharIsTerminal = isTerminal(lastChar); /* * SEA Step 1: * "Find the first node v at or above the end of S[j-1,i] that either * has a suffix link from it or is the root. This requires walking up * at most one edge from the end of S[j-1,i] in the current tree. Let * \gamma (possibly empty) denote the string between v and the * end of S[j-1,i]." */ /* * SEA Step 2: * "If v is not the root, traverse the suffix link from v to node * s(v) and then walk down from s(v) following the path for string * gamma. If v is the root, then follow the path for S[j,i] from the * root (as in the naive algorithm)." */ int gammaEnd = i; int gammaStart = gammaEnd - extState.gammaLength; if (extState.nextNode == null || extState.nextNode.isRoot()) { String beta = extState.string.substring(j, i); logger.log(Level.FINEST, String.format("beta: %d,%d <%s>%c", j, i, beta, lastChar)); m.reset(j, i); m.matchFrom(root, true); } else { logger.log(Level.FINEST, String.format("gammaLength:%d", extState.gammaLength)); String gamma = extState.string.substring(gammaStart, gammaEnd); logger.log( Level.FINEST, String.format("gamma: %d,%d <%s>%c", gammaStart, gammaEnd, gamma, lastChar)); m.reset(gammaStart, gammaEnd); m.matchFrom(extState.nextNode, true); } /* * SEA Step 3: * "Using the extension rules, ensure that the string S[j,i]S(i+1) is * in the tree." * * In our coordinates, this is array[j,i)+array[i] * \beta = array[j,i) * * Rule 1: the path \beta ends at a leaf. (we shouldn't see this case). * Rule 2: the path \beta is not continued by array[i]. That is, \beta * ends either at a node (in which case, no child of the node * starts with array[i]), or in an edge (in which case, the edge * doesn't continue with array[i]). Either way, we create a new * edge that is labeled with array[i] (coordinates: [i,i+1) ). * Rule 3: \beta+array[i] is already in the tree -- either \beta ends in * an edge that continues with array[i], or at a node that has * a child under array[i]. Either way, return false (break!). */ TreeEdge newEdge = null; if (m.lastEdge == null) { logger.log(Level.FINEST, String.format("Found root.")); // the \beta string matched to the root (was empty). So we need // to simply check the children of the root. boolean foundLastChar = !lastCharIsTerminal ? root.childEdges.containsKey(lastChar) : root.terminalEdges.containsKey(extState.string.getIndex()); if (foundLastChar) { // Rule 3 rule3 = true; logger.log(Level.FINEST, "Rule #3, Root"); extState.nextNode = null; extState.gammaLength = 0; logger.log(Level.FINEST, String.format("nextGamma: %d", extState.gammaLength)); } else { // Rule 2 logger.log(Level.FINEST, "Rule #2, Root"); newEdge = new TreeEdge(extState.string, i, null, root); root.addEdge(newEdge); extState.nextNode = null; extState.gammaLength = 0; logger.log(Level.FINEST, String.format("nextGamma: %d", extState.gammaLength)); } } else if (m.inEdgeMiddle()) { int offset = m.lastMatchLength(); logger.log(Level.FINEST, String.format("Found edge middle: %d", offset)); boolean foundLastChar = !lastCharIsTerminal ? m.lastEdge.getChar(offset) == lastChar : (m.lastEdge.string.getIndex() == extState.string.getIndex() && offset == m.lastEdge.length() - 1); logger.log(Level.FINEST, String.format("foundLastChar: %s", foundLastChar)); if (foundLastChar) { // Rule 3 rule3 = true; logger.log(Level.FINEST, "Rule #3, Edge"); extState.nextNode = m.lastEdge.headNode; // extState.gammaLength = m.lastMatchLength() + 1; extState.gammaLength = m.lastMatchLength() + (j == i ? 1 : 0); assert extState.gammaLength >= 0; logger.log(Level.FINEST, String.format("nextGamma: %d", extState.gammaLength)); } else { // Rule 2 logger.log(Level.FINEST, "Rule #2, Edge"); TreeEdge newLowerEdge = m.lastEdge.split(offset); extState.edgesWithE.add(newLowerEdge); newEdge = new TreeEdge(extState.string, i, null, m.lastEdge.tailNode); m.lastEdge.tailNode.addEdge(newEdge); newRule2Node = m.lastEdge.tailNode; extState.nextNode = m.lastEdge.headNode; // extState.gammaLength = m.lastEdge.length() + 1; extState.gammaLength = m.lastEdge.length() + (j == i ? 1 : 0); assert extState.gammaLength >= 0; logger.log(Level.FINEST, String.format("nextGamma: %d", extState.gammaLength)); } if (extState.nextNode.suffixLink == null && !extState.nextNode.isRoot()) { logger.log( Level.FINEST, String.format("Walking up edge: %d", extState.nextNode.parentEdge.length())); extState.gammaLength += extState.nextNode.parentEdge.length(); extState.nextNode = extState.nextNode.parentEdge.headNode; } } else { logger.log(Level.FINEST, String.format("Found node.")); boolean foundLastChar = !lastCharIsTerminal ? m.lastEdge.tailNode.childEdges.containsKey(lastChar) : m.lastEdge.tailNode.terminalEdges.containsKey(extState.string.getIndex()); logger.log(Level.FINEST, String.format("foundLastChar: %s", foundLastChar)); if (foundLastChar) { // Rule 3 rule3 = true; logger.log(Level.FINEST, "Rule #3, Node"); extState.nextNode = m.lastEdge.headNode; // extState.gammaLength = m.lastEdge.length() + 1; extState.gammaLength = m.lastEdge.length() + (j == i ? 1 : 0); assert extState.gammaLength >= 0; logger.log(Level.FINEST, String.format("nextGamma: %d", extState.gammaLength)); } else { // Rule 2 logger.log(Level.FINEST, "Rule #2, Node"); newEdge = new TreeEdge(extState.string, i, null, m.lastEdge.tailNode); m.lastEdge.tailNode.addEdge(newEdge); extState.nextNode = m.lastEdge.headNode; // extState.gammaLength = m.lastEdge.length() + 1; extState.gammaLength = m.lastEdge.length() + (j == i ? 1 : 0); logger.log(Level.FINEST, String.format("nextGamma: %d", extState.gammaLength)); assert extState.gammaLength >= 0; } if (extState.nextNode.suffixLink == null && !extState.nextNode.isRoot()) { logger.log( Level.FINEST, String.format("Walking up edge: %d", extState.nextNode.parentEdge.length())); extState.gammaLength += extState.nextNode.parentEdge.length(); extState.nextNode = extState.nextNode.parentEdge.headNode; } } if (extState.nextNode != null) { logger.log(Level.FINEST, "Following suffix link."); extState.nextNode = extState.nextNode.suffixLink; } else { logger.log(Level.FINEST, "Suffix link not found."); } if (newEdge != null) { newEdge.tailNode.suffixes.add(extState.currentSuffix); extState.nextSuffix(); extState.edgesWithE.add(newEdge); logger.log(Level.FINEST, String.format("Added suffix: %d", j)); } /* * SEA Step 4: * "If a new internal node w was created in extension j-1 (by extension rule 2) * then by Lemma 6.1.1 string alpha must end at node s(w), the end node for the * suffix link from w. Create the suffix link (w, s(w)) from w to s(w)." * * This wording is confusing -- is there a typo in Gusfield? I'm not sure where * the 'w' comes from. */ if (extState.rule2Node != null) { if (m.lastEdge != null) { extState.rule2Node.suffixLink = m.lastEdge.tailNode; logger.log(Level.FINEST, "Adding suffix link --> internal node."); } else { extState.rule2Node.suffixLink = root; logger.log(Level.FINEST, "Adding suffix link --> root."); } } /* * Update any state that will be needed in the next extension. */ extState.rule2Node = newRule2Node; logger.exiting("UkkonenSuffixTree", "ukkonenSEA"); // "Rule 3 is a show stopper" means that, if we encounter rule 3, // we *don't* continue. return !rule3; }