public boolean addSortedExclusive(Annotation annot) { Annotation currAnot = null; // overlapping check for (int i = 0; i < size(); ++i) { currAnot = (Annotation) get(i); if (annot.overlaps(currAnot)) { return false; } // if } // for long annotStart = annot.getStartNode().getOffset().longValue(); long currStart; // insert for (int i = 0; i < size(); ++i) { currAnot = (Annotation) get(i); currStart = currAnot.getStartNode().getOffset().longValue(); if (annotStart < currStart) { insertElementAt(annot, i); /* Out.prln("Insert start: "+annotStart+" at position: "+i+" size="+size()); Out.prln("Current start: "+currStart); */ return true; } // if } // for int size = size(); insertElementAt(annot, size); // Out.prln("Insert start: "+annotStart+" at size position: "+size); return true; } // addSorted
public SeminarDatabase(String filename) throws FileNotFoundException, IOException { this.filename = filename; seminars = new HashMap(); BufferedReader reader = new BufferedReader(new FileReader(filename)); String line = reader.readLine(); while (line != null) { String[] parts = line.split("\t"); String id = parts[0]; int count = (new Integer(parts[1])).intValue(); Vector annotationVector = new Vector(); for (int i = 0; i < count; i++) { String line2 = reader.readLine(); String[] parts2 = line2.split("\t"); String type = parts2[0]; int offset = (new Integer(parts2[1])).intValue(); int length = (new Integer(parts2[2])).intValue(); Annotation annotation = new Annotation(type, offset, length); annotation.text = parts2[3]; annotationVector.add(annotation); } Annotation[] annotations = (Annotation[]) annotationVector.toArray(new Annotation[0]); Seminar seminar = new Seminar(annotations); if (seminars.get(id) == null) { seminars.put(id, new Vector()); } ((Vector) seminars.get(id)).add(seminar); line = reader.readLine(); } }
/** splits document 'doc' into sentences, adding 'sentence' annotations */ static void addSentences(Document doc) { SpecialZoner.findSpecialZones(doc); Vector<Annotation> textSegments = doc.annotationsOfType("TEXT"); if (textSegments == null) { System.out.println("No <TEXT> in document"); return; } for (Annotation ann : textSegments) { Span textSpan = ann.span(); // check document case Ace.monocase = Ace.allLowerCase(doc); // split into sentences SentenceSplitter.split(doc, textSpan); } Vector<Annotation> sentences = doc.annotationsOfType("sentence"); if (sentences != null) { int sentNo = 0; for (Annotation sentence : sentences) { sentNo++; sentence.put("ID", "SENT-" + sentNo); } } doc.removeAnnotationsOfType("dateline"); doc.removeAnnotationsOfType("textBreak"); doc.shrink("sentence"); }
private boolean hasParamAnnotation(Method method) { Annotation[][] paramAnnotationArrays = method.getParameterAnnotations(); for (Annotation[] paramAnnotations : paramAnnotationArrays) for (Annotation paramAnnotation : paramAnnotations) if (paramAnnotation.annotationType().isAssignableFrom(Param.class)) return true; return false; }
public static boolean hasAnnotation(Annotation[] anns, Class<? extends Annotation> clazz) { for (Annotation ann : anns) { if (clazz == ann.annotationType()) { return true; } } return false; }
/** * train the tagger using the DocumentCollection in file 'trainingCollection'. * 'trainingCollection' should consist of documents which have been explicitly tagged with * part-of-speech information. */ void train(String trainingCollection) { for (int i = 0; i < posTable.length; i++) tagTable[i] = new String[] {"constit", "cat", posTable[i], posTable[i]}; // build ergodic HMM with one state for each POS (plus start and end states) HMMstate startState = new HMMstate("start", "", WordFeatureHMMemitter.class); posh.addState(startState); for (int j = 0; j < posTable.length; j++) startState.addArc(new HMMarc(posTable[j], 0)); HMMstate endState = new HMMstate("end", "", WordFeatureHMMemitter.class); posh.addState(endState); for (int i = 0; i < posTable.length; i++) { String pos = posTable[i]; HMMstate state = new HMMstate(pos, pos, WordFeatureHMMemitter.class); posh.addState(state); for (int j = 0; j < posTable.length; j++) state.addArc(new HMMarc(posTable[j], 0)); state.addArc(new HMMarc("end", 0)); } posh.resolveNames(); posh.resetForTraining(); annotator = new HMMannotator(posh); annotator.setTagTable(tagTable); annotator.setBItag(false); DocumentCollection col = new DocumentCollection(trainingCollection); col.open(); for (int i = 0; i < col.size(); i++) { ExternalDocument doc = col.get(i); doc.open(); System.out.println("Training from " + doc.fileName()); // divide at endmarks (constit cat="."), adding "S" marks int posn = 0; int start = posn; Vector anns; while ((anns = doc.annotationsAt(posn, "constit")) != null) { Annotation ann = (Annotation) anns.get(0); posn = ann.span().end(); String pos = (String) ann.get("cat"); if (pos.equals(".")) { doc.annotate("S", new Span(start, posn), new FeatureSet()); start = posn; } } annotator.train(doc); // free up space taken by annotations on document doc.clearAnnotations(); } posh.computeProbabilities(); }
/** * hides (adds the 'hidden' feature) to all annotations of type <I>type</I> beginning at the * starting position of span <I>span</I>. */ public static void hideAnnotations(Document doc, String type, Span span) { for (int posn = span.start(); posn < span.end(); posn++) { Vector annotations = doc.annotationsAt(posn, type); if (annotations != null) { for (int i = 0; i < annotations.size(); i++) { Annotation ann = (Annotation) annotations.elementAt(i); ann.put("hidden", "true"); // Console.println ("Hiding " + ann); } } } }
static void writeDoc1(Document doc, PrintStream out) throws IOException { Vector<Annotation> entities = doc.annotationsOfType("entity"); if (entities == null) { System.err.println("No Entity: " + doc); return; } Iterator<Annotation> entityIt = entities.iterator(); int i = 0; while (entityIt.hasNext()) { Annotation entity = entityIt.next(); Vector mentions = (Vector) entity.get("mentions"); Iterator mentionIt = mentions.iterator(); String nameType = (String) entity.get("nameType"); while (mentionIt.hasNext()) { Annotation mention1 = (Annotation) mentionIt.next(); Annotation mention2 = new Annotation("refobj", mention1.span(), new FeatureSet()); mention2.put("objid", Integer.toString(i)); if (nameType != null) { mention2.put("netype", nameType); } doc.addAnnotation(mention2); } i++; } // remove other annotations. String[] annotypes = doc.getAnnotationTypes(); for (i = 0; i < annotypes.length; i++) { String t = annotypes[i]; if (!(t.equals("tagger") || t.equals("refobj") || t.equals("ENAMEX"))) { doc.removeAnnotationsOfType(t); } } writeDocRaw(doc, out); return; }
private static Vector<Annotation> parseAnnotations(Node parent) throws XmlParserException { Vector<Annotation> annotations = new Vector<Annotation>(); NodeList nodes = parent.getChildNodes(); for (int nodeid = 0; nodeid < nodes.getLength(); ++nodeid) { Node node = nodes.item(nodeid); if (node.getNodeType() != Node.ELEMENT_NODE) continue; Element element = (Element) node; if (element.getTagName().equals("annotation")) { String label = null, value = null, valuetype = null, unit = null; NodeList annotation_nodes = element.getChildNodes(); for (int annotationid = 0; annotationid < annotation_nodes.getLength(); ++annotationid) { Node annotation_node = annotation_nodes.item(annotationid); if (annotation_node.getNodeType() != Node.ELEMENT_NODE) continue; Element annotation_element = (Element) annotation_node; if (annotation_element.getTagName().equals("label")) label = annotation_element.getTextContent(); else if (annotation_element.getTagName().equals("value")) value = annotation_element.getTextContent(); else if (annotation_element.getTagName().equals("valuetype")) valuetype = annotation_element.getTextContent(); } if (label == null || value == null || valuetype == null) throw new XmlParserException("Annotation is missing either: label, value or valuetype"); Annotation annotation = new Annotation(label, value, Annotation.ValueType.valueOf(valuetype)); annotation.setUnit(unit); if (annotation.getValueType() == Annotation.ValueType.ONTOLOGY) annotation.setOntologyRef(element.getAttribute("ontologyref")); if (element.getAttribute("unit") != null) annotation.setUnit(element.getAttribute("unit")); annotations.add(annotation); } } return annotations; }
protected Annotation parseAnnotation(Reader input, String fileName) throws IOException { BufferedReader in = new BufferedReader(input); Annotation ret = new Annotation(fileName); String line = ""; int lineno = 0; Matcher matcher = BLAME_PATTERN.matcher(line); while ((line = in.readLine()) != null) { ++lineno; matcher.reset(line); if (matcher.find()) { String rev = matcher.group(1); String author = matcher.group(2).trim(); ret.addLine(rev, author, true); } else { OpenGrokLogger.getLogger() .log( Level.SEVERE, "Error: did not find annotation in line {0}: [{1}]", new Object[] {String.valueOf(lineno), line}); } } return ret; }
private void runTest(Annotation annotation) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); serializer.write(annotation, out); byte[] serialized = out.toByteArray(); ByteArrayInputStream in = new ByteArrayInputStream(serialized); Annotation deserialized = serializer.read(in).first(); assertEquals( annotation.get(CoreAnnotations.SentencesAnnotation.class).size(), deserialized.get(CoreAnnotations.SentencesAnnotation.class).size()); for (int i = 0; i < annotation.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) { verifySentence( annotation.get(CoreAnnotations.SentencesAnnotation.class).get(i), deserialized.get(CoreAnnotations.SentencesAnnotation.class).get(i)); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); // Get sentence. Properties props; Annotation ann; StanfordCoreNLP.OutputFormat of; log("[" + httpExchange.getRemoteAddress() + "] Received message"); try { props = getProperties(httpExchange); ann = getDocument(props, httpExchange); of = StanfordCoreNLP.OutputFormat.valueOf( props.getProperty("outputFormat", "json").toUpperCase()); // Handle direct browser connections (i.e., not a POST request). if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) { log("[" + httpExchange.getRemoteAddress() + "] Interactive connection"); staticPageHandle.handle(httpExchange); return; } log("[" + httpExchange.getRemoteAddress() + "] API call"); } catch (Exception e) { // Return error message. e.printStackTrace(); String response = e.getMessage(); httpExchange.getResponseHeaders().add("Content-Type", "text/plain"); httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length()); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); return; } try { // Annotate StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); Future<Annotation> completedAnnotationFuture = corenlpExecutor.submit( () -> { pipeline.annotate(ann); return ann; }); Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS); // Get output ByteArrayOutputStream os = new ByteArrayOutputStream(); StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline)) .accept(completedAnnotation, os); os.close(); byte[] response = os.toByteArray(); httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of)); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } catch (TimeoutException e) { respondError("CoreNLP request timed out", httpExchange); } catch (Exception e) { // Return error message. respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } }
public static void main(String[] args) { if (args.length != 2) { System.out.println( "This program expects two parameters: \n" + "1. Input predicted genes XML filename \n" + "2. Output GFF filename\n"); } else { String inFileString = args[0]; String outFileString = args[1]; File inFile = new File(inFileString); File outFile = new File(outFileString); try { BufferedWriter outBuff = new BufferedWriter(new FileWriter(outFile)); // writing header first outBuff.write(GFF_HEADER + "\n"); Date currentDate = new Date(); outBuff.write(DATE_HEADER + currentDate.toString() + "\n"); outBuff.write(TYPE_HEADER + "\n"); BufferedReader reader = new BufferedReader(new FileReader(inFile)); String tempSt; StringBuilder stBuilder = new StringBuilder(); while ((tempSt = reader.readLine()) != null) { stBuilder.append(tempSt); } // closing input file reader reader.close(); Annotation annotation = new Annotation(stBuilder.toString()); HashMap<String, TreeSet<GffLine>> linesPerContig = new HashMap<String, TreeSet<GffLine>>(); // -----------POTATIZING GENES---------------- List<Element> contigsGenes = annotation .asJDomElement() .getChild(PredictedGenes.TAG_NAME) .getChildren(ContigXML.TAG_NAME); for (Element element : contigsGenes) { ContigXML contig = new ContigXML(element); TreeSet<GffLine> lines = new TreeSet<GffLine>(); linesPerContig.put(contig.getId(), lines); List<XMLElement> genes = contig.getChildrenWith(PredictedGene.TAG_NAME); for (XMLElement xMLElement : genes) { PredictedGene gene = new PredictedGene(xMLElement.asJDomElement()); String geneLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + GENE + SEPARATOR; int beginPos = gene.getStartPosition(); int endPos = gene.getEndPosition(); int initPos = beginPos; if (beginPos < endPos) { geneLine += beginPos + SEPARATOR + endPos + SEPARATOR; } else { geneLine += endPos + SEPARATOR + beginPos + SEPARATOR; initPos = endPos; } geneLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "." + SEPARATOR + LOCUS_TAG + gene.getId() + ";\n"; lines.add(new GffLine(initPos, geneLine)); // outBuff.write(geneLine); String cdsLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + CDS + SEPARATOR; if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) { cdsLine += gene.getStartPosition() + SEPARATOR + (gene.getEndPosition() - 3) + SEPARATOR; } else { cdsLine += (gene.getEndPosition() - 3) + SEPARATOR + gene.getStartPosition() + SEPARATOR; } cdsLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR; cdsLine += LOCUS_TAG + gene.getId() + ";" + PRODUCT + gene.getProteinNames() + ";" + CHORIZO_INFERENCE + gene.getAccession() + "\n"; // outBuff.write(cdsLine); lines.add(new GffLine(initPos, cdsLine)); String startCodonLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + START_CODON + SEPARATOR; if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) { startCodonLine += gene.getStartPosition() + SEPARATOR + (gene.getStartPosition() + 2) + SEPARATOR; } else { startCodonLine += (gene.getStartPosition() - 2) + SEPARATOR + gene.getStartPosition() + SEPARATOR; } startCodonLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR + LOCUS_TAG + gene.getId() + ";"; startCodonLine += PRODUCT + gene.getProteinNames() + ";" + CHORIZO_INFERENCE + gene.getAccession() + "\n"; // outBuff.write(startCodonLine); lines.add(new GffLine(initPos, startCodonLine)); String stopCodonLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + STOP_CODON + SEPARATOR; if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) { stopCodonLine += (gene.getEndPosition() + 1) + SEPARATOR + (gene.getEndPosition() + 3) + SEPARATOR; } else { stopCodonLine += (gene.getEndPosition() - 3) + SEPARATOR + (gene.getEndPosition() - 1) + SEPARATOR; } stopCodonLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR + LOCUS_TAG + gene.getId() + ";"; stopCodonLine += PRODUCT + gene.getProteinNames() + ";" + CHORIZO_INFERENCE + gene.getAccession() + "\n"; // outBuff.write(stopCodonLine); lines.add(new GffLine(initPos, stopCodonLine)); } } // -----------POTATIZING RNAS----------------- List<Element> contigsRnas = annotation .asJDomElement() .getChild(PredictedRnas.TAG_NAME) .getChildren(ContigXML.TAG_NAME); for (Element element : contigsRnas) { ContigXML contig = new ContigXML(element); List<XMLElement> rnas = contig.getChildrenWith(PredictedRna.TAG_NAME); TreeSet<GffLine> lines = linesPerContig.get(contig.getId()); if (lines == null) { lines = new TreeSet<GffLine>(); linesPerContig.put(contig.getId(), lines); } for (XMLElement xMLElement : rnas) { PredictedRna rna = new PredictedRna(xMLElement.asJDomElement()); String rnaLine = contig.getId() + SEPARATOR + CHORIZO_RNA + SEPARATOR + RNA + SEPARATOR; int beginPos = rna.getStartPosition(); int endPos = rna.getEndPosition(); int initPos = beginPos; if (beginPos < endPos) { rnaLine += beginPos + SEPARATOR + endPos + SEPARATOR; } else { rnaLine += endPos + SEPARATOR + beginPos + SEPARATOR; initPos = endPos; } rnaLine += rna.getEvalue() + SEPARATOR + rna.getStrand() + SEPARATOR + "." + SEPARATOR + LOCUS_TAG + rna.getId() + ";"; String columns[] = rna.getAnnotationUniprotId().split("\\|"); String rnaProduct = columns[3]; String refSeqId = columns[1]; String positions = columns[2].substring(1); // ref|NC_007413|:3894075-3895562|16S ribosomal RNA| [locus_tag=Ava_R0035] rnaLine += PRODUCT + rnaProduct + "," + "rna:RefSeq:" + refSeqId + " " + positions + "\n"; // outBuff.write(rnaLine); lines.add(new GffLine(initPos, rnaLine)); } } Set<String> keys = linesPerContig.keySet(); for (String key : keys) { TreeSet<GffLine> lines = linesPerContig.get(key); GffLine line = lines.pollFirst(); while (line != null) { outBuff.write(line.getLine()); line = lines.pollFirst(); } } outBuff.close(); System.out.println("Done!!! :D"); } catch (Exception e) { e.printStackTrace(); } } }
/** * Run from the command-line, with a list of URLs as argument. * * <p><B>NOTE:</B><br> * This code will run with all the documents in memory - if you want to unload each from memory * after use, add code to store the corpus in a DataStore. */ public static void main(String args[]) throws GateException, IOException { // initialise the GATE library Out.prln("Initialising GATE..."); Gate.init(); Out.prln("...GATE initialised"); // initialise ANNIE (this may take several minutes) StandAloneAnnie annie = new StandAloneAnnie(); annie.initAnnie(); // create a GATE corpus and add a document for each command-line // argument Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus"); for (int i = 0; i < args.length; i++) { URL u = new URL(args[i]); FeatureMap params = Factory.newFeatureMap(); params.put("sourceUrl", u); params.put("preserveOriginalContent", new Boolean(true)); params.put("collectRepositioningInfo", new Boolean(true)); Out.prln("Creating doc for " + u); Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params); corpus.add(doc); } // for each of args // tell the pipeline about the corpus and run it annie.setCorpus(corpus); annie.execute(); // for each document, get an XML document with the // person and location names added Iterator iter = corpus.iterator(); int count = 0; String startTagPart_1 = "<span GateID=\""; String startTagPart_2 = "\" title=\""; String startTagPart_3 = "\" style=\"background:Red;\">"; String endTag = "</span>"; while (iter.hasNext()) { Document doc = (Document) iter.next(); AnnotationSet defaultAnnotSet = doc.getAnnotations(); Set annotTypesRequired = new HashSet(); annotTypesRequired.add("Person"); annotTypesRequired.add("Location"); Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired)); FeatureMap features = doc.getFeatures(); String originalContent = (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); RepositioningInfo info = (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); ++count; File file = new File("StANNIE_" + count + ".HTML"); Out.prln("File name: '" + file.getAbsolutePath() + "'"); if (originalContent != null && info != null) { Out.prln("OrigContent and reposInfo existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionStart = info.getOriginalPos(insertPositionStart); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); insertPositionEnd = info.getOriginalPos(insertPositionEnd, true); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } // if - should generate else if (originalContent != null) { Out.prln("OrigContent existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } else { Out.prln("Content : " + originalContent); Out.prln("Repositioning: " + info); } String xmlDocument = doc.toXml(peopleAndPlaces, false); String fileName = new String("StANNIE_toXML_" + count + ".HTML"); FileWriter writer = new FileWriter(fileName); writer.write(xmlDocument); writer.close(); } // for each doc } // main