コード例 #1
0
 private static <IN extends CoreMap> void printAnswersAsIsText(List<IN> l, PrintWriter out) {
   for (IN wi : l) {
     out.print(StringUtils.getNotNullString(wi.get(BeforeAnnotation.class)));
     out.print(StringUtils.getNotNullString(wi.get(TextAnnotation.class)));
     out.print('/');
     out.print(StringUtils.getNotNullString(wi.get(AnswerAnnotation.class)));
     out.print(StringUtils.getNotNullString(wi.get(AfterAnnotation.class)));
   }
 }
コード例 #2
0
 private static <IN extends CoreMap> void printAnswersTokenizedText(List<IN> l, PrintWriter out) {
   for (IN wi : l) {
     out.print(StringUtils.getNotNullString(wi.get(TextAnnotation.class)));
     out.print('/');
     out.print(StringUtils.getNotNullString(wi.get(AnswerAnnotation.class)));
     out.print(' ');
   }
   out.println(); // put a single newline at the end [added 20091024].
 }
コード例 #3
0
 private static <IN extends CoreMap> void printAnswersTokenizedXML(List<IN> doc, PrintWriter out) {
   int num = 0;
   for (IN wi : doc) {
     out.print("<wi num=\"");
     // tag.append(wi.get("position"));
     out.print(num++);
     out.print("\" entity=\"");
     out.print(StringUtils.getNotNullString(wi.get(AnswerAnnotation.class)));
     out.print("\">");
     out.print(XMLUtils.escapeXML(StringUtils.getNotNullString(wi.get(TextAnnotation.class))));
     out.println("</wi>");
   }
 }
コード例 #4
0
  private void printAnswersTokenizedInlineXML(List<IN> doc, PrintWriter out) {
    final String background = flags.backgroundSymbol;
    String prevTag = background;
    boolean first = true;
    for (Iterator<IN> wordIter = doc.iterator(); wordIter.hasNext(); ) {
      IN wi = wordIter.next();
      String tag = StringUtils.getNotNullString(wi.get(AnswerAnnotation.class));
      if (!tag.equals(prevTag)) {
        if (!prevTag.equals(background) && !tag.equals(background)) {
          out.print("</");
          out.print(prevTag);
          out.print("> <");
          out.print(tag);
          out.print('>');
        } else if (!prevTag.equals(background)) {
          out.print("</");
          out.print(prevTag);
          out.print("> ");
        } else if (!tag.equals(background)) {
          if (!first) {
            out.print(' ');
          }
          out.print('<');
          out.print(tag);
          out.print('>');
        }
      } else {
        if (!first) {
          out.print(' ');
        }
      }
      first = false;
      out.print(StringUtils.getNotNullString(wi.get(CoreAnnotations.OriginalTextAnnotation.class)));

      if (!wordIter.hasNext()) {
        if (!tag.equals(background)) {
          out.print("</");
          out.print(tag);
          out.print('>');
        }
        out.print(' ');
        prevTag = background;
      } else {
        prevTag = tag;
      }
    }
    out.println();
  }
コード例 #5
0
  private void printAnswersInlineXML(List<IN> doc, PrintWriter out) {
    final String background = flags.backgroundSymbol;
    String prevTag = background;
    for (Iterator<IN> wordIter = doc.iterator(); wordIter.hasNext(); ) {
      IN wi = wordIter.next();
      String tag = StringUtils.getNotNullString(wi.get(AnswerAnnotation.class));

      String before = StringUtils.getNotNullString(wi.get(BeforeAnnotation.class));

      String current =
          StringUtils.getNotNullString(wi.get(CoreAnnotations.OriginalTextAnnotation.class));
      if (!tag.equals(prevTag)) {
        if (!prevTag.equals(background) && !tag.equals(background)) {
          out.print("</");
          out.print(prevTag);
          out.print('>');
          out.print(before);
          out.print('<');
          out.print(tag);
          out.print('>');
        } else if (!prevTag.equals(background)) {
          out.print("</");
          out.print(prevTag);
          out.print('>');
          out.print(before);
        } else if (!tag.equals(background)) {
          out.print(before);
          out.print('<');
          out.print(tag);
          out.print('>');
        }
      } else {
        out.print(before);
      }
      out.print(current);
      String afterWS = StringUtils.getNotNullString(wi.get(AfterAnnotation.class));

      if (!tag.equals(background) && !wordIter.hasNext()) {
        out.print("</");
        out.print(tag);
        out.print('>');
        prevTag = background;
      } else {
        prevTag = tag;
      }
      out.print(afterWS);
    }
  }
コード例 #6
0
  // todo: give options for document splitting. A line or the whole file or
  // sentence splitting as now
  public Iterator<List<IN>> getIterator(Reader r) {
    Tokenizer<IN> tokenizer = tokenizerFactory.getTokenizer(r);
    // PTBTokenizer.newPTBTokenizer(r, false, true);
    List<IN> words = new ArrayList<IN>();
    IN previous = tokenFactory.makeToken();
    StringBuilder prepend = new StringBuilder();

    /*
     * This changes SGML tags into whitespace -- it should maybe be moved
     * elsewhere
     */
    while (tokenizer.hasNext()) {
      IN w = tokenizer.next();
      String word = w.get(CoreAnnotations.TextAnnotation.class);
      Matcher m = sgml.matcher(word);
      if (m.matches()) {

        String before = StringUtils.getNotNullString(w.get(CoreAnnotations.BeforeAnnotation.class));
        String after = StringUtils.getNotNullString(w.get(CoreAnnotations.AfterAnnotation.class));
        prepend.append(before).append(word);
        String previousTokenAfter =
            StringUtils.getNotNullString(previous.get(CoreAnnotations.AfterAnnotation.class));
        previous.set(AfterAnnotation.class, previousTokenAfter + word + after);
        // previous.appendAfter(w.word() + w.after());
      } else {

        String before = StringUtils.getNotNullString(w.get(CoreAnnotations.BeforeAnnotation.class));
        if (prepend.length() > 0) {
          w.set(BeforeAnnotation.class, prepend.toString() + before);
          // w.prependBefore(prepend.toString());
          prepend = new StringBuilder();
        }
        words.add(w);
        previous = w;
      }
    }

    List<List<IN>> sentences = wts.process(words);
    String after = "";
    IN last = null;
    for (List<IN> sentence : sentences) {
      int pos = 0;
      for (IN w : sentence) {
        w.set(PositionAnnotation.class, Integer.toString(pos));
        after = StringUtils.getNotNullString(w.get(CoreAnnotations.AfterAnnotation.class));
        w.remove(AfterAnnotation.class);
        last = w;
      }
    }
    if (last != null) {
      last.set(AfterAnnotation.class, after);
    }

    return sentences.iterator();
  }