コード例 #1
0
 /**
  * Adds a word as an Item to this WordRelation object.
  *
  * @param word the word to add
  */
 public void addWord(String word) {
   Item tokenItem = tokenToWords.getTokenItem();
   Item wordItem = tokenItem.createDaughter();
   FeatureSet featureSet = wordItem.getFeatures();
   featureSet.setString("name", word);
   relation.appendItem(wordItem);
 }
コード例 #2
0
 /** Adds a break as a feature to the last item in the list. */
 public void addBreak() {
   Item wordItem = (Item) relation.getTail();
   if (wordItem != null) {
     FeatureSet featureSet = wordItem.getFeatures();
     featureSet.setString("break", "1");
   }
 }
コード例 #3
0
ファイル: APFtoXML.java プロジェクト: rgrishman/jet
 /** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */
 static void addMentionTags(Document doc, AceDocument aceDoc) {
   ArrayList<AceEntity> entities = aceDoc.entities;
   for (int i = 0; i < entities.size(); i++) {
     AceEntity entity = entities.get(i);
     ArrayList<AceEntityMention> mentions = entity.mentions;
     for (int j = 0; j < mentions.size(); j++) {
       AceEntityMention mention = (AceEntityMention) mentions.get(j);
       // we compute a jetSpan not including trailing whitespace
       Span aceSpan = mention.head;
       // skip mentions in ChEnglish APF not aligned to any English text
       if (aceSpan.start() < 0) continue;
       Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1);
       FeatureSet features = new FeatureSet("entity", new Integer(i));
       if (flags.contains("types")) {
         features.put("type", entity.type.substring(0, 3));
         if (entity.subtype != null) features.put("subtype", entity.subtype);
       }
       if (flags.contains("extents")) {
         String cleanExtent = mention.text.replaceAll("\n", " ");
         features.put("extent", AceEntityMention.addXmlEscapes(cleanExtent));
       }
       doc.annotate("mention", jetSpan, features);
     }
   }
 }
コード例 #4
0
 /**
  * performs the action, adding the specified Annotation. Returns the position of the end of the
  * Annotation.
  */
 @Override
 public int perform(Document doc, PatternApplication patap) {
   Span span;
   HashMap bindings = patap.bestBindings;
   // System.out.println ("bindings (for new annotation): " + bindings);
   if (spanVariable == null) {
     span = new Span(patap.startPosition, patap.bestPosition);
   } else if (spanVariable.name.toString() == "0") {
     span = new Span(patap.startPosition, patap.startPosition);
   } else {
     Object value = bindings.get(spanVariable.name);
     if (value instanceof Span) {
       span = (Span) value;
     } else if (value instanceof Annotation) {
       span = ((Annotation) value).span();
     } else {
       System.out.println("Value of " + spanVariable.toString() + " is not a span.or annotation");
       return -1;
     }
   }
   if (Pat.trace)
     Console.println(
         "Annotating "
             + doc.text(span)
             + " as "
             + type
             + " "
             + features.substitute(bindings).toSGMLString());
   hideAnnotations(doc, type, span);
   hideAnnotations(doc, "token", span);
   Annotation newAnnotation = new Annotation(type, span, features.substitute(bindings));
   doc.addAnnotation(newAnnotation);
   if (bindingVariable != null) bindings.put(bindingVariable.name, newAnnotation);
   return span.end();
 }
コード例 #5
0
ファイル: APFtoXML.java プロジェクト: rgrishman/jet
 static void addTimexTags(Document doc, AceDocument aceDoc) {
   List<AceTimex> timeExpressions = aceDoc.timeExpressions;
   for (AceTimex timex : timeExpressions) {
     AceTimexMention mention = (AceTimexMention) timex.mentions.get(0);
     Span aceSpan = mention.extent;
     Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1);
     FeatureSet features = new FeatureSet();
     if (timex.val != null && !timex.val.equals("")) features.put("val", timex.val);
     if (timex.anchorVal != null && !timex.anchorVal.equals(""))
       features.put("anchor_val", timex.anchorVal);
     if (timex.anchorDir != null && !timex.anchorDir.equals(""))
       features.put("anchor_dir", timex.anchorDir);
     if (timex.set != null && !timex.set.equals("")) features.put("set", timex.set);
     if (timex.mod != null && !timex.mod.equals("")) features.put("mod", timex.mod);
     doc.annotate("timex2", jetSpan, features);
   }
 }
コード例 #6
0
ファイル: APFtoCorefXML.java プロジェクト: rgrishman/jet
 /** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */
 static void addMentionTags(Document doc, AceDocument aceDoc) {
   ArrayList<AceEntity> entities = aceDoc.entities;
   for (int i = 0; i < entities.size(); i++) {
     AceEntity entity = (AceEntity) entities.get(i);
     ArrayList<AceEntityMention> mentions = entity.mentions;
     for (int j = 0; j < mentions.size(); j++) {
       AceEntityMention mention = mentions.get(j);
       // we compute a jetSpan not including trailing whitespace
       Span aceSpan = mention.head;
       Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1);
       FeatureSet features = new FeatureSet("entity", new Integer(i));
       if (showTypes) {
         features.put("type", entity.type.substring(0, 3));
         if (entity.subtype != null) features.put("subtype", entity.subtype);
       }
       doc.annotate("mention", jetSpan, features);
     }
   }
 }
コード例 #7
0
ファイル: Utterance.java プロジェクト: helang818/PolyU_MScST
  /**
   * Sets the token list for this utterance. Note that this could be optimized by turning the token
   * list directly into the token relation.
   *
   * @param tokenList the tokenList
   */
  private void setTokenList(List tokenList) {
    setInputText(tokenList);

    Relation relation = createRelation(Relation.TOKEN);
    for (Iterator i = tokenList.iterator(); i.hasNext(); ) {
      Token token = (Token) i.next();
      String tokenWord = token.getWord();

      if (tokenWord != null && tokenWord.length() > 0) {
        Item item = relation.appendItem();

        FeatureSet featureSet = item.getFeatures();
        featureSet.setString("name", tokenWord);
        featureSet.setString("whitespace", token.getWhitespace());
        featureSet.setString("prepunctuation", token.getPrepunctuation());
        featureSet.setString("punc", token.getPostpunctuation());
        featureSet.setString("file_pos", String.valueOf(token.getPosition()));
        featureSet.setString("line_number", String.valueOf(token.getLineNumber()));
      }
    }
  }
コード例 #8
0
 /**
  * Sets the last Item in this WordRelation to the given word.
  *
  * @param word the word to set
  */
 public void setLastWord(String word) {
   Item lastItem = relation.getTail();
   FeatureSet featureSet = lastItem.getFeatures();
   featureSet.setString("name", word);
 }
コード例 #9
0
 @Override
 public String toString() {
   if (spanVariable == null) return "add [" + type + features.toSGMLString() + "]";
   else return "add [" + type + features.toSGMLString() + "] over " + spanVariable.toString();
 }