Пример #1
0
  /**
   * Builds Jet.Tipster.Document object from Penn treebank corpus.
   *
   * @param in
   * @return
   * @throws IOException
   * @throws InvalidFormatException
   */
  public Treebank load(Reader in) throws IOException, InvalidFormatException {

    List<ParseTreeNode> trees = new ArrayList<ParseTreeNode>();
    PushbackReader input = new PushbackReader(in);

    int start = 0;
    while (true) {
      skipWhitespace(input);
      if (lookAhead(input) == -1) {
        break;
      }

      ParseTreeNode tree = readNode(input);
      trees.add(tree);
      determineSpans(tree, start);
      setAnnotations(tree, null);
      start = tree.end;
    }

    String text = buildDocumentString(trees);
    Document doc = new Document(text);
    for (ParseTreeNode tree : trees) {
      doc.annotate("sentence", new Span(tree.start, tree.end), new FeatureSet());
      annotate(doc, tree);
    }

    return new Treebank(doc, trees);
  }
  public void apply(Document doc, List<Object> values, Span span, DateTime ref) {
    Map params = getParameters();
    String value = (String) params.get("value");
    String diff = (String) params.get("diff");
    String dir = (String) params.get("dir");
    DateTime val = ref;
    if (value != null) {
      value = assignValues(value, values);
      val = new DateTime(value);
    } else if (diff != null) {
      diff = assignValues(diff, values);
      Period period = new Period(diff);

      if (dir == null || dir.equals("plus")) {
        val = ref.plus(period);
      } else if (dir.equals("minus")) {
        val = ref.minus(period);
      }
    } else {
      val = ref;
      // use set_xxx
      for (Map.Entry entry : (Set<Map.Entry>) params.entrySet()) {
        Matcher m = Pattern.compile("set_(.*)").matcher((String) entry.getKey());
        if (m.matches()) {
          String field = assignValues((String) entry.getValue(), values);
          String fieldName = m.group(1);

          if (fieldName.equals("month")) {
            int month = Integer.parseInt(field);
            val = getTimeAnnotator().normalizeMonth(val, month);
          } else if (fieldName.equals("day")) {
            int day = Integer.parseInt(field);
            val = val.withField(DateTimeFieldType.dayOfMonth(), day);
          } else {
            throw new InternalError();
          }
        }
      }
    }

    String formattedDate = formatter.print(val);
    FeatureSet attrs = new FeatureSet();
    attrs.put("VAL", formattedDate);

    doc.annotate("TIMEX2", span, attrs);
  }
Пример #3
0
  private void annotate(Document doc, ParseTreeNode node) {
    doc.addAnnotation(node.ann);
    if (node.children != null) {
      Annotation[] children = new Annotation[node.children.length];
      for (int i = 0; i < node.children.length; i++) {
        children[i] = node.children[i].ann;
      }
      node.ann.put("children", children);

      for (ParseTreeNode child : node.children) {
        annotate(doc, child);
      }
    }

    if (node.children == null && isAddingTokens) {
      // TODO: adds `case' property
      doc.annotate("token", node.ann.span(), new FeatureSet());
    }
  }