private static <IN extends CoreMap> void printAnswersAsIsText(List<IN> l, PrintWriter out) { for (IN wi : l) { out.print(StringUtils.getNotNullString(wi.get(BeforeAnnotation.class))); out.print(StringUtils.getNotNullString(wi.get(TextAnnotation.class))); out.print('/'); out.print(StringUtils.getNotNullString(wi.get(AnswerAnnotation.class))); out.print(StringUtils.getNotNullString(wi.get(AfterAnnotation.class))); } }
private static <IN extends CoreMap> void printAnswersTokenizedText(List<IN> l, PrintWriter out) { for (IN wi : l) { out.print(StringUtils.getNotNullString(wi.get(TextAnnotation.class))); out.print('/'); out.print(StringUtils.getNotNullString(wi.get(AnswerAnnotation.class))); out.print(' '); } out.println(); // put a single newline at the end [added 20091024]. }
private static <IN extends CoreMap> void printAnswersTokenizedXML(List<IN> doc, PrintWriter out) { int num = 0; for (IN wi : doc) { out.print("<wi num=\""); // tag.append(wi.get("position")); out.print(num++); out.print("\" entity=\""); out.print(StringUtils.getNotNullString(wi.get(AnswerAnnotation.class))); out.print("\">"); out.print(XMLUtils.escapeXML(StringUtils.getNotNullString(wi.get(TextAnnotation.class)))); out.println("</wi>"); } }
private void printAnswersTokenizedInlineXML(List<IN> doc, PrintWriter out) { final String background = flags.backgroundSymbol; String prevTag = background; boolean first = true; for (Iterator<IN> wordIter = doc.iterator(); wordIter.hasNext(); ) { IN wi = wordIter.next(); String tag = StringUtils.getNotNullString(wi.get(AnswerAnnotation.class)); if (!tag.equals(prevTag)) { if (!prevTag.equals(background) && !tag.equals(background)) { out.print("</"); out.print(prevTag); out.print("> <"); out.print(tag); out.print('>'); } else if (!prevTag.equals(background)) { out.print("</"); out.print(prevTag); out.print("> "); } else if (!tag.equals(background)) { if (!first) { out.print(' '); } out.print('<'); out.print(tag); out.print('>'); } } else { if (!first) { out.print(' '); } } first = false; out.print(StringUtils.getNotNullString(wi.get(CoreAnnotations.OriginalTextAnnotation.class))); if (!wordIter.hasNext()) { if (!tag.equals(background)) { out.print("</"); out.print(tag); out.print('>'); } out.print(' '); prevTag = background; } else { prevTag = tag; } } out.println(); }
private void printAnswersInlineXML(List<IN> doc, PrintWriter out) { final String background = flags.backgroundSymbol; String prevTag = background; for (Iterator<IN> wordIter = doc.iterator(); wordIter.hasNext(); ) { IN wi = wordIter.next(); String tag = StringUtils.getNotNullString(wi.get(AnswerAnnotation.class)); String before = StringUtils.getNotNullString(wi.get(BeforeAnnotation.class)); String current = StringUtils.getNotNullString(wi.get(CoreAnnotations.OriginalTextAnnotation.class)); if (!tag.equals(prevTag)) { if (!prevTag.equals(background) && !tag.equals(background)) { out.print("</"); out.print(prevTag); out.print('>'); out.print(before); out.print('<'); out.print(tag); out.print('>'); } else if (!prevTag.equals(background)) { out.print("</"); out.print(prevTag); out.print('>'); out.print(before); } else if (!tag.equals(background)) { out.print(before); out.print('<'); out.print(tag); out.print('>'); } } else { out.print(before); } out.print(current); String afterWS = StringUtils.getNotNullString(wi.get(AfterAnnotation.class)); if (!tag.equals(background) && !wordIter.hasNext()) { out.print("</"); out.print(tag); out.print('>'); prevTag = background; } else { prevTag = tag; } out.print(afterWS); } }
// todo: give options for document splitting. A line or the whole file or // sentence splitting as now public Iterator<List<IN>> getIterator(Reader r) { Tokenizer<IN> tokenizer = tokenizerFactory.getTokenizer(r); // PTBTokenizer.newPTBTokenizer(r, false, true); List<IN> words = new ArrayList<IN>(); IN previous = tokenFactory.makeToken(); StringBuilder prepend = new StringBuilder(); /* * This changes SGML tags into whitespace -- it should maybe be moved * elsewhere */ while (tokenizer.hasNext()) { IN w = tokenizer.next(); String word = w.get(CoreAnnotations.TextAnnotation.class); Matcher m = sgml.matcher(word); if (m.matches()) { String before = StringUtils.getNotNullString(w.get(CoreAnnotations.BeforeAnnotation.class)); String after = StringUtils.getNotNullString(w.get(CoreAnnotations.AfterAnnotation.class)); prepend.append(before).append(word); String previousTokenAfter = StringUtils.getNotNullString(previous.get(CoreAnnotations.AfterAnnotation.class)); previous.set(AfterAnnotation.class, previousTokenAfter + word + after); // previous.appendAfter(w.word() + w.after()); } else { String before = StringUtils.getNotNullString(w.get(CoreAnnotations.BeforeAnnotation.class)); if (prepend.length() > 0) { w.set(BeforeAnnotation.class, prepend.toString() + before); // w.prependBefore(prepend.toString()); prepend = new StringBuilder(); } words.add(w); previous = w; } } List<List<IN>> sentences = wts.process(words); String after = ""; IN last = null; for (List<IN> sentence : sentences) { int pos = 0; for (IN w : sentence) { w.set(PositionAnnotation.class, Integer.toString(pos)); after = StringUtils.getNotNullString(w.get(CoreAnnotations.AfterAnnotation.class)); w.remove(AfterAnnotation.class); last = w; } } if (last != null) { last.set(AfterAnnotation.class, after); } return sentences.iterator(); }