Example #1
0
 /** Returns space-separated words that constitute this phrase. */
 public CharSequence getPhrase(int index) {
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < wordIndices[index].length; i++) {
     if (i > 0) sb.append(" ");
     sb.append(new String(allWords.image[wordIndices[index][i]]));
   }
   return sb;
 }
Example #2
0
    /** For debugging purposes. */
    @Override
    public String toString() {
      StringWriter sw = new StringWriter();
      TabularOutput t = new TabularOutput(sw);
      t.flushEvery(Integer.MAX_VALUE);

      t.addColumn("#");
      t.addColumn("token").alignLeft();
      t.addColumn("type");
      t.addColumn("fieldIndex");
      t.addColumn("=>field").alignLeft();
      t.addColumn("docIdx");
      t.addColumn("wordIdx");
      t.addColumn("=>word").alignLeft();

      for (int i = 0; i < image.length; i++, t.nextRow()) {
        t.rowData(
            i,
            image[i] == null ? "<null>" : new String(image[i]),
            type[i],
            fieldIndex[i],
            fieldIndex[i] >= 0 ? allFields.name[fieldIndex[i]] : null,
            documentIndex[i],
            wordIndex[i],
            wordIndex[i] >= 0 ? new String(allWords.image[wordIndex[i]]) : null);
      }

      if (suffixOrder != null) {
        t = new TabularOutput(sw);
        t.addColumn("#");
        t.addColumn("sa");
        t.addColumn("lcp");
        t.addColumn("=>words").alignLeft();

        sw.append("\n");
        final StringBuilder suffixImage = new StringBuilder();
        for (int i = 0; i < suffixOrder.length; i++, t.nextRow()) {
          t.rowData(i, suffixOrder[i], lcp[i]);

          int windowLength = 5;
          for (int j = suffixOrder[i],
                  max = Math.min(suffixOrder[i] + windowLength, wordIndex.length);
              j < max; ) {
            suffixImage
                .append(wordIndex[j] >= 0 ? new String(allWords.image[wordIndex[j]]) : "|")
                .append(" ");
            if (++j == max && j != wordIndex.length) suffixImage.append(" [...]");
          }
          t.rowData(suffixImage.toString());
          suffixImage.setLength(0);
        }
        sw.append("\n");
      }

      t.flush();
      sw.append("\n");
      return sw.toString();
    }
  /**
   * Build a cluster's label from suffix tree edge indices, including some debugging and diagnostic
   * information.
   */
  @SuppressWarnings("unused")
  private String buildDebugLabel(int[] phraseIndices) {
    final StringBuilder b = new StringBuilder();

    String sep = "";
    int k = 0;
    final short[] tokenTypes = context.allWords.type;
    for (int i = 0; i < phraseIndices.length; i += 2) {
      for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; j++, k++) {
        b.append(sep);

        final int termIndex = sb.input.get(j);
        b.append(context.allWords.image[termIndex]);

        if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) b.append("[S]");
        sep = " ";
      }
      sep = "_";
    }

    return b.toString();
  }