/** * Initialise the ANNIE system. This creates a "corpus pipeline" application that can be used to * run sets of documents through the extraction system. */ public void initAnnie() throws GateException, IOException { Out.prln("Initialising ANNIE..."); // load the ANNIE application from the saved state in plugins/ANNIE File pluginsHome = Gate.getPluginsHome(); File anniePlugin = new File(pluginsHome, "ANNIE"); File annieGapp = new File(anniePlugin, "ANNIE_with_defaults.gapp"); annieController = (CorpusController) PersistenceManager.loadObjectFromFile(annieGapp); Out.prln("...ANNIE loaded"); } // initAnnie()
@Override public Resource init() throws ResourceInstantiationException { gracefulExit = false; if (configFileURL == null) { gracefulExit = true; gate.util.Err.println("No configuration file provided!"); } if (japeURL == null) { gracefulExit = true; gate.util.Err.println("No JAPE grammar file provided!"); } // create the init params for the JAPE transducer FeatureMap params = Factory.newFeatureMap(); params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, japeURL); // Code borrowed from Mark Greenwood's Measurements PR if (japeTransducer == null) { // if this is the first time we are running init then actually create a // new transducer as we don't already have one FeatureMap hidden = Factory.newFeatureMap(); Gate.setHiddenAttribute(hidden, true); japeTransducer = (Transducer) Factory.createResource("gate.creole.Transducer", params, hidden); } else { // we are being run through a call to reInit so simply re-init the // underlying JAPE transducer japeTransducer.setParameterValues(params); japeTransducer.reInit(); } ConfigReader config = new ConfigReader(configFileURL); gracefulExit = config.config(); try { HashMap<String, String> options = config.getOptions(); patternMap = new HashMap<String, Pattern>(); addSuffixPattern("disease_suffix", options); addWordPattern("disease_abbrevs", options); addWordPattern("disease_sense", options); addWordExtraPattern("disease_sense_context", options); addPossessiveWordPattern("disease_named_syndrome", options); addWordExtraPattern("disease_generic_context", options); addWordExtraPattern("disease_anatomy_context", options); addSuffixPluralPattern("procedure_suffix", options); addWordPluralPattern("procedure_key", options); addWordExtraPattern("procedure_anatomy_context", options); addWordPluralPattern("symptom_key", options); addWordPattern("test_key", options); addSuffixPattern("anatomy_suffix_adjective", options); addSuffixPattern("anatomy_suffix", options); addPrefixPattern("anatomy_prefix", options); addWordPattern("anatomy_position", options); addWordPluralPattern("anatomy_space_region_junction", options); addWordPattern("anatomy_part_adjective", options); addWordPattern("anatomy_latin_noun", options); addWordPattern("anatomy_muscle", options); addWordPluralPattern("anatomy_part", options); addWordPluralPattern("anatomy_fluid", options); } catch (NullPointerException ne) { gracefulExit = true; gate.util.Err.println( "Missing or unset configuration options. Please check configuration file."); } return this; } // end init()
/** * Run from the command-line, with a list of URLs as argument. * * <p><B>NOTE:</B><br> * This code will run with all the documents in memory - if you want to unload each from memory * after use, add code to store the corpus in a DataStore. */ public static void main(String args[]) throws GateException, IOException { // initialise the GATE library Out.prln("Initialising GATE..."); Gate.init(); Out.prln("...GATE initialised"); // initialise ANNIE (this may take several minutes) StandAloneAnnie annie = new StandAloneAnnie(); annie.initAnnie(); // create a GATE corpus and add a document for each command-line // argument Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus"); for (int i = 0; i < args.length; i++) { URL u = new URL(args[i]); FeatureMap params = Factory.newFeatureMap(); params.put("sourceUrl", u); params.put("preserveOriginalContent", new Boolean(true)); params.put("collectRepositioningInfo", new Boolean(true)); Out.prln("Creating doc for " + u); Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params); corpus.add(doc); } // for each of args // tell the pipeline about the corpus and run it annie.setCorpus(corpus); annie.execute(); // for each document, get an XML document with the // person and location names added Iterator iter = corpus.iterator(); int count = 0; String startTagPart_1 = "<span GateID=\""; String startTagPart_2 = "\" title=\""; String startTagPart_3 = "\" style=\"background:Red;\">"; String endTag = "</span>"; while (iter.hasNext()) { Document doc = (Document) iter.next(); AnnotationSet defaultAnnotSet = doc.getAnnotations(); Set annotTypesRequired = new HashSet(); annotTypesRequired.add("Person"); annotTypesRequired.add("Location"); Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired)); FeatureMap features = doc.getFeatures(); String originalContent = (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); RepositioningInfo info = (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); ++count; File file = new File("StANNIE_" + count + ".HTML"); Out.prln("File name: '" + file.getAbsolutePath() + "'"); if (originalContent != null && info != null) { Out.prln("OrigContent and reposInfo existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionStart = info.getOriginalPos(insertPositionStart); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); insertPositionEnd = info.getOriginalPos(insertPositionEnd, true); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } // if - should generate else if (originalContent != null) { Out.prln("OrigContent existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } else { Out.prln("Content : " + originalContent); Out.prln("Repositioning: " + info); } String xmlDocument = doc.toXml(peopleAndPlaces, false); String fileName = new String("StANNIE_toXML_" + count + ".HTML"); FileWriter writer = new FileWriter(fileName); writer.write(xmlDocument); writer.close(); } // for each doc } // main