Exemplo n.º 1
0
 /** splits document 'doc' into sentences, adding 'sentence' annotations */
 static void addSentences(Document doc) {
   SpecialZoner.findSpecialZones(doc);
   Vector<Annotation> textSegments = doc.annotationsOfType("TEXT");
   if (textSegments == null) {
     System.out.println("No <TEXT> in document");
     return;
   }
   for (Annotation ann : textSegments) {
     Span textSpan = ann.span();
     // check document case
     Ace.monocase = Ace.allLowerCase(doc);
     // split into sentences
     SentenceSplitter.split(doc, textSpan);
   }
   Vector<Annotation> sentences = doc.annotationsOfType("sentence");
   if (sentences != null) {
     int sentNo = 0;
     for (Annotation sentence : sentences) {
       sentNo++;
       sentence.put("ID", "SENT-" + sentNo);
     }
   }
   doc.removeAnnotationsOfType("dateline");
   doc.removeAnnotationsOfType("textBreak");
   doc.shrink("sentence");
 }
Exemplo n.º 2
0
 static void writeDoc1(Document doc, PrintStream out) throws IOException {
   Vector<Annotation> entities = doc.annotationsOfType("entity");
   if (entities == null) {
     System.err.println("No Entity: " + doc);
     return;
   }
   Iterator<Annotation> entityIt = entities.iterator();
   int i = 0;
   while (entityIt.hasNext()) {
     Annotation entity = entityIt.next();
     Vector mentions = (Vector) entity.get("mentions");
     Iterator mentionIt = mentions.iterator();
     String nameType = (String) entity.get("nameType");
     while (mentionIt.hasNext()) {
       Annotation mention1 = (Annotation) mentionIt.next();
       Annotation mention2 = new Annotation("refobj", mention1.span(), new FeatureSet());
       mention2.put("objid", Integer.toString(i));
       if (nameType != null) {
         mention2.put("netype", nameType);
       }
       doc.addAnnotation(mention2);
     }
     i++;
   }
   // remove other annotations.
   String[] annotypes = doc.getAnnotationTypes();
   for (i = 0; i < annotypes.length; i++) {
     String t = annotypes[i];
     if (!(t.equals("tagger") || t.equals("refobj") || t.equals("ENAMEX"))) {
       doc.removeAnnotationsOfType(t);
     }
   }
   writeDocRaw(doc, out);
   return;
 }
Exemplo n.º 3
0
  /**
   * Adds <B>constit</B> annotations to an existing Document <CODE>doc</CODE> to represent the parse
   * tree structure of a set of trees <CODE>trees</CODE>.
   *
   * @param trees list of parse trees
   * @param doc document to which annotations should be added
   * @param targetAnnotation name of annotation to determine spans to add parse tree annotations.
   * @param span target span.
   * @param jetCategories if false, use lexical categories from Penn Tree Bank; if true, use
   *     categories from Jet
   */
  public void addAnnotations(
      List<ParseTreeNode> trees,
      Document doc,
      String targetAnnotation,
      Span span,
      boolean jetCategories) {
    List<Annotation> targetList = (List<Annotation>) doc.annotationsOfType(targetAnnotation, span);
    Comparator<Annotation> cmp =
        new Comparator<Annotation>() {
          public int compare(Annotation a, Annotation b) {
            return a.span().compareTo(b.span());
          }
        };

    Collections.sort(targetList, cmp);
    if (trees.size() != targetList.size()) {
      System.err.println(
          "PTBReader.addAnnotations:  mismatch between number of "
              + targetAnnotation
              + " ("
              + targetList.size()
              + ") and number of trees ("
              + trees.size()
              + ")");
    }
    int n = Math.min(trees.size(), targetList.size());
    for (int i = 0; i < n; i++) {
      ParseTreeNode tree = trees.get(i);
      addAnnotations(tree, doc, targetList.get(i).span(), jetCategories);
      targetList.get(i).put("parse", tree.ann);
    }
  }
Exemplo n.º 4
0
 /**
  * parse all the sentences in Document 'doc', returning a SyntacticRelationSet containing all the
  * dependency relations.
  */
 public static SyntacticRelationSet parseDocument(Document doc) {
   Vector<Annotation> sentences = doc.annotationsOfType("sentence");
   if (sentences == null || sentences.size() == 0) {
     System.out.println("DepParser:  no sentences");
     return null;
   }
   if (fsw == null) {
     System.out.println("DepParser:  no model loaded");
     return null;
   }
   SyntacticRelationSet relations = new SyntacticRelationSet();
   for (Annotation sentence : sentences) {
     Span span = sentence.span();
     parseSentence(doc, span, relations);
   }
   return relations;
 }