/** splits document 'doc' into sentences, adding 'sentence' annotations */ static void addSentences(Document doc) { SpecialZoner.findSpecialZones(doc); Vector<Annotation> textSegments = doc.annotationsOfType("TEXT"); if (textSegments == null) { System.out.println("No <TEXT> in document"); return; } for (Annotation ann : textSegments) { Span textSpan = ann.span(); // check document case Ace.monocase = Ace.allLowerCase(doc); // split into sentences SentenceSplitter.split(doc, textSpan); } Vector<Annotation> sentences = doc.annotationsOfType("sentence"); if (sentences != null) { int sentNo = 0; for (Annotation sentence : sentences) { sentNo++; sentence.put("ID", "SENT-" + sentNo); } } doc.removeAnnotationsOfType("dateline"); doc.removeAnnotationsOfType("textBreak"); doc.shrink("sentence"); }
static void writeDoc1(Document doc, PrintStream out) throws IOException { Vector<Annotation> entities = doc.annotationsOfType("entity"); if (entities == null) { System.err.println("No Entity: " + doc); return; } Iterator<Annotation> entityIt = entities.iterator(); int i = 0; while (entityIt.hasNext()) { Annotation entity = entityIt.next(); Vector mentions = (Vector) entity.get("mentions"); Iterator mentionIt = mentions.iterator(); String nameType = (String) entity.get("nameType"); while (mentionIt.hasNext()) { Annotation mention1 = (Annotation) mentionIt.next(); Annotation mention2 = new Annotation("refobj", mention1.span(), new FeatureSet()); mention2.put("objid", Integer.toString(i)); if (nameType != null) { mention2.put("netype", nameType); } doc.addAnnotation(mention2); } i++; } // remove other annotations. String[] annotypes = doc.getAnnotationTypes(); for (i = 0; i < annotypes.length; i++) { String t = annotypes[i]; if (!(t.equals("tagger") || t.equals("refobj") || t.equals("ENAMEX"))) { doc.removeAnnotationsOfType(t); } } writeDocRaw(doc, out); return; }
/** * Adds <B>constit</B> annotations to an existing Document <CODE>doc</CODE> to represent the parse * tree structure of a set of trees <CODE>trees</CODE>. * * @param trees list of parse trees * @param doc document to which annotations should be added * @param targetAnnotation name of annotation to determine spans to add parse tree annotations. * @param span target span. * @param jetCategories if false, use lexical categories from Penn Tree Bank; if true, use * categories from Jet */ public void addAnnotations( List<ParseTreeNode> trees, Document doc, String targetAnnotation, Span span, boolean jetCategories) { List<Annotation> targetList = (List<Annotation>) doc.annotationsOfType(targetAnnotation, span); Comparator<Annotation> cmp = new Comparator<Annotation>() { public int compare(Annotation a, Annotation b) { return a.span().compareTo(b.span()); } }; Collections.sort(targetList, cmp); if (trees.size() != targetList.size()) { System.err.println( "PTBReader.addAnnotations: mismatch between number of " + targetAnnotation + " (" + targetList.size() + ") and number of trees (" + trees.size() + ")"); } int n = Math.min(trees.size(), targetList.size()); for (int i = 0; i < n; i++) { ParseTreeNode tree = trees.get(i); addAnnotations(tree, doc, targetList.get(i).span(), jetCategories); targetList.get(i).put("parse", tree.ann); } }
/** * parse all the sentences in Document 'doc', returning a SyntacticRelationSet containing all the * dependency relations. */ public static SyntacticRelationSet parseDocument(Document doc) { Vector<Annotation> sentences = doc.annotationsOfType("sentence"); if (sentences == null || sentences.size() == 0) { System.out.println("DepParser: no sentences"); return null; } if (fsw == null) { System.out.println("DepParser: no model loaded"); return null; } SyntacticRelationSet relations = new SyntacticRelationSet(); for (Annotation sentence : sentences) { Span span = sentence.span(); parseSentence(doc, span, relations); } return relations; }