/** * Reads annotated token. * * @param in * @return readed token. * @throws IOException * @throws InvalidFormatException */ private String readWord(PushbackReader in) throws IOException, InvalidFormatException { int c; StringBuilder buffer = new StringBuilder(); while (true) { c = in.read(); if (c != -1 && backslashAsEscapeChar && c == '\\') { c = in.read(); } if (c == ')') { break; } else if (c == -1) { throw new InvalidFormatException(); } buffer.append((char) c); } in.unread(c); String word = buffer.toString(); if (TRANSFORM_TABLE.containsKey(word)) { word = TRANSFORM_TABLE.get(word); } return word; }
/** * Reads a tag name which is after opened parenthesis. * * @param in * @return readed token string * @throws IOException * @throws InvalidFormatException */ private String readTagName(PushbackReader in) throws IOException, InvalidFormatException { StringBuilder buffer = new StringBuilder(); int c; while (true) { c = in.read(); if (c == -1) { throw new InvalidFormatException(); } else if (Character.isWhitespace(c)) { break; } buffer.append((char) c); } in.unread(c); if (buffer.length() == 0) { throw new InvalidFormatException(); } return buffer.toString().toLowerCase().intern(); }
private String buildDocumentString(List<ParseTreeNode> trees) { StringBuilder buffer = new StringBuilder(); for (ParseTreeNode tree : trees) { List<ParseTreeNode> terminals = getTerminalNodes(tree); for (ParseTreeNode terminal : terminals) { if (terminal.word != null) { buffer.append(terminal.word); while (buffer.length() < terminal.end) { buffer.append(' '); } } } // set last character to newline if (buffer.charAt(buffer.length() - 1) == ' ') { buffer.setCharAt(buffer.length() - 1, '\n'); } } return buffer.toString(); }
/** * Remove last whitespace character and modify annotation span. * * @param annotations * @param buffer */ private void modifyAnnotationEnd(List<Annotation> annotations, StringBuilder buffer) { ListIterator<Annotation> it = annotations.listIterator(annotations.size()); if (buffer.length() == 0) { return; } if (!Character.isWhitespace(buffer.charAt(buffer.length() - 1))) { return; } while (it.hasPrevious()) { Annotation a = it.previous(); if (a.end() != buffer.length()) { break; } Span span = new Span(a.start(), a.end() - 1); Annotation replacement = new Annotation(a.type(), span, a.attributes()); it.set(replacement); } buffer.deleteCharAt(buffer.length() - 1); }