/** * Add an array of objects reporting View's Constituents' surface form and character offsets. May * make deserialization to TextAnnotation problematic, as the relevant methods deduce token * character offsets directly from list of token strings and raw text. * * @param fieldName name to give to this field * @param view view whose character offsets will be serialized * @param json Json object to which resulting array will be added */ private static void writeTokenOffsets(String fieldName, View view, JsonObject json) { JsonArray offsetArray = new JsonArray(); for (Constituent c : view.getConstituents()) { JsonObject cJ = new JsonObject(); writeString(FORM, c.getSurfaceForm(), cJ); writeInt(STARTCHAROFFSET, c.getStartCharOffset(), cJ); writeInt(ENDCHAROFFSET, c.getEndCharOffset(), cJ); offsetArray.add(cJ); } json.add(fieldName, offsetArray); }
protected String getNERString() { List<Constituent> constituents = new ArrayList<>(view.getConstituents()); Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator); StringBuilder sb = new StringBuilder(); String text = textAnnotation.getText(); int where = 0; for (Constituent c : constituents) { int start = c.getStartCharOffset(); String startstring = text.substring(where, start); sb.append(startstring) .append("[") .append(c.getLabel()) .append(" ") .append(c.getTokenizedSurfaceForm()) .append(" ] "); where = c.getEndCharOffset(); } return sb.toString(); }