/** Returns space-separated words that constitute this phrase. */ public CharSequence getPhrase(int index) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < wordIndices[index].length; i++) { if (i > 0) sb.append(" "); sb.append(new String(allWords.image[wordIndices[index][i]])); } return sb; }
/** For debugging purposes. */ @Override public String toString() { StringWriter sw = new StringWriter(); TabularOutput t = new TabularOutput(sw); t.flushEvery(Integer.MAX_VALUE); t.addColumn("#"); t.addColumn("token").alignLeft(); t.addColumn("type"); t.addColumn("fieldIndex"); t.addColumn("=>field").alignLeft(); t.addColumn("docIdx"); t.addColumn("wordIdx"); t.addColumn("=>word").alignLeft(); for (int i = 0; i < image.length; i++, t.nextRow()) { t.rowData( i, image[i] == null ? "<null>" : new String(image[i]), type[i], fieldIndex[i], fieldIndex[i] >= 0 ? allFields.name[fieldIndex[i]] : null, documentIndex[i], wordIndex[i], wordIndex[i] >= 0 ? new String(allWords.image[wordIndex[i]]) : null); } if (suffixOrder != null) { t = new TabularOutput(sw); t.addColumn("#"); t.addColumn("sa"); t.addColumn("lcp"); t.addColumn("=>words").alignLeft(); sw.append("\n"); final StringBuilder suffixImage = new StringBuilder(); for (int i = 0; i < suffixOrder.length; i++, t.nextRow()) { t.rowData(i, suffixOrder[i], lcp[i]); int windowLength = 5; for (int j = suffixOrder[i], max = Math.min(suffixOrder[i] + windowLength, wordIndex.length); j < max; ) { suffixImage .append(wordIndex[j] >= 0 ? new String(allWords.image[wordIndex[j]]) : "|") .append(" "); if (++j == max && j != wordIndex.length) suffixImage.append(" [...]"); } t.rowData(suffixImage.toString()); suffixImage.setLength(0); } sw.append("\n"); } t.flush(); sw.append("\n"); return sw.toString(); }
/** * Build a cluster's label from suffix tree edge indices, including some debugging and diagnostic * information. */ @SuppressWarnings("unused") private String buildDebugLabel(int[] phraseIndices) { final StringBuilder b = new StringBuilder(); String sep = ""; int k = 0; final short[] tokenTypes = context.allWords.type; for (int i = 0; i < phraseIndices.length; i += 2) { for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; j++, k++) { b.append(sep); final int termIndex = sb.input.get(j); b.append(context.allWords.image[termIndex]); if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) b.append("[S]"); sep = " "; } sep = "_"; } return b.toString(); }