public UkkonenState(TreeString str) { string = str; lastE = 0; edgesWithE = new LinkedList<TreeEdge>(); nextPhaseStart = 0; nextExtStart = 0; matcher = null; nextNode = root; gammaLength = 0; rule2Node = null; if (string.getIndex() > 0) { matcher = findEdge(root, string, 0, string.length(), false); nextPhaseStart = matcher.matchedTo; nextExtStart = 0; lastE = matcher.matchedTo; logger.log( Level.FINEST, String.format( "String %s can start at phase %d (E:%d)", string.toString(), nextPhaseStart, lastE)); } else { matcher = new EdgeMatch(string, 0, string.length()); } currentSuffix = new StringSuffix(string, 0); }
private void naiveExtendSuffixTree(int arrayIdx) { TreeString string = strings.get(arrayIdx); // the array.length-1 constraint, instead of array.length, is because // we assume that the terminal character has already been added to the // string, and we don't want to *just* add the suffix that is that // character. for (int i = 0; i <= string.length(); i++) { logger.log( Level.FINEST, String.format("Naive Extension: \"%s\"", string.substring(i, string.length() + 1))); naiveExtendSuffix(string, i); } }
public String getSuffixString() { StringBuilder sb = new StringBuilder(); for (int i = offset; i <= string.length(); i++) { sb.append(string.getChar(i)); } return sb.toString(); }
private void naiveExtendSuffix(TreeString string, int start) { EdgeMatch em = findEdge(root, string, start, string.length(), false); StringSuffix stringSuffix = new StringSuffix(string, start); TreeEdge leafEdge = null; if (em.completedMatch()) { leafEdge = em.lastEdge; } else { if (em.lastEdge == null) { leafEdge = new TreeEdge(string, start, string.length(), root); root.addEdge(leafEdge); } else { leafEdge = new TreeEdge(string, em.matchedTo, string.length(), em.lastEdge.tailNode); if (em.inEdgeMiddle()) { int offset = em.lastMatchLength(); em.lastEdge.split(offset); } em.lastEdge.tailNode.addEdge(leafEdge); } } leafEdge.tailNode.suffixes.add(stringSuffix); }
public boolean isTerminal() { return start == string.length(); }