/** * Adds a word as an Item to this WordRelation object. * * @param word the word to add */ public void addWord(String word) { Item tokenItem = tokenToWords.getTokenItem(); Item wordItem = tokenItem.createDaughter(); FeatureSet featureSet = wordItem.getFeatures(); featureSet.setString("name", word); relation.appendItem(wordItem); }
/** Adds a break as a feature to the last item in the list. */ public void addBreak() { Item wordItem = (Item) relation.getTail(); if (wordItem != null) { FeatureSet featureSet = wordItem.getFeatures(); featureSet.setString("break", "1"); } }
/** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */ static void addMentionTags(Document doc, AceDocument aceDoc) { ArrayList<AceEntity> entities = aceDoc.entities; for (int i = 0; i < entities.size(); i++) { AceEntity entity = entities.get(i); ArrayList<AceEntityMention> mentions = entity.mentions; for (int j = 0; j < mentions.size(); j++) { AceEntityMention mention = (AceEntityMention) mentions.get(j); // we compute a jetSpan not including trailing whitespace Span aceSpan = mention.head; // skip mentions in ChEnglish APF not aligned to any English text if (aceSpan.start() < 0) continue; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet("entity", new Integer(i)); if (flags.contains("types")) { features.put("type", entity.type.substring(0, 3)); if (entity.subtype != null) features.put("subtype", entity.subtype); } if (flags.contains("extents")) { String cleanExtent = mention.text.replaceAll("\n", " "); features.put("extent", AceEntityMention.addXmlEscapes(cleanExtent)); } doc.annotate("mention", jetSpan, features); } } }
/** * performs the action, adding the specified Annotation. Returns the position of the end of the * Annotation. */ @Override public int perform(Document doc, PatternApplication patap) { Span span; HashMap bindings = patap.bestBindings; // System.out.println ("bindings (for new annotation): " + bindings); if (spanVariable == null) { span = new Span(patap.startPosition, patap.bestPosition); } else if (spanVariable.name.toString() == "0") { span = new Span(patap.startPosition, patap.startPosition); } else { Object value = bindings.get(spanVariable.name); if (value instanceof Span) { span = (Span) value; } else if (value instanceof Annotation) { span = ((Annotation) value).span(); } else { System.out.println("Value of " + spanVariable.toString() + " is not a span.or annotation"); return -1; } } if (Pat.trace) Console.println( "Annotating " + doc.text(span) + " as " + type + " " + features.substitute(bindings).toSGMLString()); hideAnnotations(doc, type, span); hideAnnotations(doc, "token", span); Annotation newAnnotation = new Annotation(type, span, features.substitute(bindings)); doc.addAnnotation(newAnnotation); if (bindingVariable != null) bindings.put(bindingVariable.name, newAnnotation); return span.end(); }
static void addTimexTags(Document doc, AceDocument aceDoc) { List<AceTimex> timeExpressions = aceDoc.timeExpressions; for (AceTimex timex : timeExpressions) { AceTimexMention mention = (AceTimexMention) timex.mentions.get(0); Span aceSpan = mention.extent; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet(); if (timex.val != null && !timex.val.equals("")) features.put("val", timex.val); if (timex.anchorVal != null && !timex.anchorVal.equals("")) features.put("anchor_val", timex.anchorVal); if (timex.anchorDir != null && !timex.anchorDir.equals("")) features.put("anchor_dir", timex.anchorDir); if (timex.set != null && !timex.set.equals("")) features.put("set", timex.set); if (timex.mod != null && !timex.mod.equals("")) features.put("mod", timex.mod); doc.annotate("timex2", jetSpan, features); } }
/** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */ static void addMentionTags(Document doc, AceDocument aceDoc) { ArrayList<AceEntity> entities = aceDoc.entities; for (int i = 0; i < entities.size(); i++) { AceEntity entity = (AceEntity) entities.get(i); ArrayList<AceEntityMention> mentions = entity.mentions; for (int j = 0; j < mentions.size(); j++) { AceEntityMention mention = mentions.get(j); // we compute a jetSpan not including trailing whitespace Span aceSpan = mention.head; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet("entity", new Integer(i)); if (showTypes) { features.put("type", entity.type.substring(0, 3)); if (entity.subtype != null) features.put("subtype", entity.subtype); } doc.annotate("mention", jetSpan, features); } } }
/** * Sets the token list for this utterance. Note that this could be optimized by turning the token * list directly into the token relation. * * @param tokenList the tokenList */ private void setTokenList(List tokenList) { setInputText(tokenList); Relation relation = createRelation(Relation.TOKEN); for (Iterator i = tokenList.iterator(); i.hasNext(); ) { Token token = (Token) i.next(); String tokenWord = token.getWord(); if (tokenWord != null && tokenWord.length() > 0) { Item item = relation.appendItem(); FeatureSet featureSet = item.getFeatures(); featureSet.setString("name", tokenWord); featureSet.setString("whitespace", token.getWhitespace()); featureSet.setString("prepunctuation", token.getPrepunctuation()); featureSet.setString("punc", token.getPostpunctuation()); featureSet.setString("file_pos", String.valueOf(token.getPosition())); featureSet.setString("line_number", String.valueOf(token.getLineNumber())); } } }
/** * Sets the last Item in this WordRelation to the given word. * * @param word the word to set */ public void setLastWord(String word) { Item lastItem = relation.getTail(); FeatureSet featureSet = lastItem.getFeatures(); featureSet.setString("name", word); }
@Override public String toString() { if (spanVariable == null) return "add [" + type + features.toSGMLString() + "]"; else return "add [" + type + features.toSGMLString() + "] over " + spanVariable.toString(); }