/** * abnerNER would analyze words and give confidence * * @param args sentences to be processed * @param arg0 information input * @throws AnalysisEngineProcessException */ public static void abnerNER(String[] args, JCas arg0) { HashMap<String, Double> ConfMap = new HashMap<String, Double>(); /** use abner to find gene names from words */ Tagger t = new Tagger(); for (int i = 1; i < args.length; ++i) { String s = args[i]; String[][] ents = t.getEntities(s); /** use HashMap to store words selected by Abner */ for (int j = 0; j < ents[0].length; j++) { ConfMap.put(ents[0][j], 1.0); } } // TODO Auto-generated method stub FSIterator<org.apache.uima.jcas.tcas.Annotation> ite = arg0.getAnnotationIndex(NameTag.type).iterator(); while (ite.hasNext()) { /** get the words selected by LingPipe */ String name = ((NameTag) ite.get()).getText(); /** set the confidence for words selected by both LingPipe and Abner as 1 */ if (ConfMap.containsKey(name)) { ((NameTag) ite.get()).setConfidenceAbner(1.0); } else { ((NameTag) ite.get()).setConfidenceAbner(0.0); } ite.next(); } }
private static String getResult(JCas jcas, String sourceLang, String interLang) { FSIterator iter = jcas.getAnnotationIndex(Target.type).iterator(); String result = ""; while (iter.isValid()) { FeatureStructure fs = iter.get(); Target transText = (Target) fs; result = transText.getContent(); iter.moveToNext(); } return result; }
public static void main(String[] args) throws IOException, InvalidXMLException, CASException, ResourceInitializationException, SAXException { JCas jcas = null; // Leer el descriptor del anotador XMLParser xmlParser = UIMAFramework.getXMLParser(); XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml"); // Crear un AE en base al descriptor AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in); // Obtener el CAS jcas = CasCreationUtils.createCas(tsDesc).getJCas(); if (jcas != null) { // De-serializar la anotacion de un fichero FileInputStream inputStream = null; inputStream = new FileInputStream("resources/annotation.xmi"); XmiCasDeserializer.deserialize(inputStream, jcas.getCas()); // Obtener el texto de la anotacion String sofaString = jcas.getDocumentText(); System.out.println(sofaString); // Usar las anotaciones del fichero FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator(); while (it.isValid()) { Metric metric = (Metric) it.get(); Number number = metric.getNumber(); Unit unit = metric.getUnit(); Double value = (number.getIsDouble()) ? number.getAbsoluteDoubleValue() : Double.valueOf(number.getAbsoluteIntegerValue()); System.out.println("==================="); System.out.println("Metric: " + metric.getCoveredText()); System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier()); System.out.println("Base unit: " + unit.getBaseUnit()); it.moveToNext(); } } }
/** * CasConsumer would use tags and features to write output file, evaluate and print precision, * recall and F-1 measure. * * @param arg0 * @throws ResourceProcessException */ @Override public void processCas(CAS arg0) throws ResourceProcessException { /** convert type of arg0 */ JCas jcas = null; try { jcas = arg0.getJCas(); } catch (CASException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // TODO Auto-generated method stub FSIterator<Annotation> ite = jcas.getAnnotationIndex(WordTag.type).iterator(); while (ite.hasNext()) { /** collect features */ String id = ((WordTag) ite.get()).getId(); int begin = ((WordTag) ite.get()).getBegin0(); int end = ((WordTag) ite.get()).getEnd0(); String name = ((WordTag) ite.get()).getName(); /** organize string for output */ report.append(id); report.append("|"); report.append(begin); report.append(" "); report.append(end); report.append("|"); report.append(name); report.append("\n"); /** count the length of output string */ count++; ite.next(); } result = report.toString(); File sampleOut = new File("src/main/resources/data/sample.out"); try { testRecall = FileUtils.file2String(sampleOut); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } /** split strings from file into sentences */ String[] resultSplit = result.split("\n"); String[] recallSplit = testRecall.split("\n"); PrecisionRecallCalculator(recallSplit, resultSplit); /** write the output file to the project root */ String path = "hw1-longh.out"; File dirFile = new File(path); /** make sure no conflict */ if (dirFile.exists()) { dirFile.delete(); } try { /** write file */ BufferedWriter bw1 = new BufferedWriter(new FileWriter(path, true)); bw1.write(report.toString()); bw1.flush(); bw1.close(); } catch (IOException e) { e.printStackTrace(); } }
/** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */ private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false); // get document text String docText = cas.getDocumentText(); char[] docCharArray = docText.toCharArray(); // get iterator over annotations sorted by increasing start position and // decreasing end position FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator(); // This is basically a recursive algorithm that has had the recursion // removed through the use of an explicit Stack. We iterate over the // annotations, and if an annotation contains other annotations, we // push the parent annotation on the stack, process the children, and // then come back to the parent later. List<AnnotationFS> stack = new ArrayList<AnnotationFS>(); int pos = 0; ContentHandler handler = sax2xml.getContentHandler(); handler.startDocument(); // write the start tag converter.startRootElement(handler); // now use null is a placeholder for this artificial Document annotation AnnotationFS curAnnot = null; while (iterator.isValid()) { AnnotationFS nextAnnot = iterator.get(); if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) { // nextAnnot's start point is within the span of curAnnot if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check { // nextAnnot is contained within curAnnot // write text between current pos and beginning of nextAnnot try { handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos); pos = nextAnnot.getBegin(); converter.startAnnotationElement(nextAnnot, handler); // push parent annotation on stack stack.add(curAnnot); // move on to next annotation curAnnot = nextAnnot; } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + nextAnnot.getBegin() + "," + nextAnnot.getEnd() + " in document of length " + docText.length()); } } iterator.moveToNext(); } else { // nextAnnot begins after curAnnot ends // write text between current pos and end of curAnnot try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + " in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); // pop next containing annotation off stack curAnnot = stack.remove(stack.size() - 1); } } // finished writing all start tags, now finish up if (curAnnot != null) { try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + "in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); while (!stack.isEmpty()) { curAnnot = stack.remove(stack.size() - 1); // pop if (curAnnot == null) { break; } try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + "in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); } } if (pos < docCharArray.length) { handler.characters(docCharArray, pos, docCharArray.length - pos); } converter.endRootElement(handler); handler.endDocument(); // return XML string return new String(byteArrayOutputStream.toByteArray()); }
/** * Called when the processing of a Document is completed. <br> * The process status can be looked at and corresponding actions taken. * * @param aCas CAS corresponding to the completed processing * @param aStatus EntityProcessStatus that holds the status of all the events for aEntity */ public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) { if (aStatus != null) { if (aStatus.isException()) { System.err.println("Error on process CAS call to remote service:"); List exceptions = aStatus.getExceptions(); for (int i = 0; i < exceptions.size(); i++) { ((Throwable) exceptions.get(i)).printStackTrace(); } if (!ignoreErrors) { System.err.println("Terminating Client..."); stop(); } } if (logCas) { String ip = "no IP"; List eList = aStatus.getProcessTrace().getEventsByComponentName("UimaEE", false); for (int e = 0; e < eList.size(); e++) { ProcessTraceEvent event = (ProcessTraceEvent) eList.get(e); if (event.getDescription().equals("Service IP")) { ip = event.getResultMessage(); } } String casId = ((UimaASProcessStatus) aStatus).getCasReferenceId(); if (casId != null) { long current = System.nanoTime() / 1000000 - mStartTime; if (casMap.containsKey(casId)) { Object value = casMap.get(casId); if (value != null && value instanceof Long) { long start = ((Long) value).longValue(); System.out.println(ip + "\t" + start + "\t" + (current - start)); } } } } else { System.out.print("."); if (0 == (entityCount + 1) % 50) { System.out.print((entityCount + 1) + " processed\n"); } } } // if output dir specified, dump CAS to XMI if (outputDir != null) { // try to retrieve the filename of the input file from the CAS File outFile = null; Type srcDocInfoType = aCas.getTypeSystem().getType("org.apache.uima.examples.SourceDocumentInformation"); if (srcDocInfoType != null) { FSIterator it = aCas.getIndexRepository().getAllIndexedFS(srcDocInfoType); if (it.hasNext()) { FeatureStructure srcDocInfoFs = it.get(); Feature uriFeat = srcDocInfoType.getFeatureByBaseName("uri"); Feature offsetInSourceFeat = srcDocInfoType.getFeatureByBaseName("offsetInSource"); String uri = srcDocInfoFs.getStringValue(uriFeat); int offsetInSource = srcDocInfoFs.getIntValue(offsetInSourceFeat); File inFile; try { inFile = new File(new URL(uri).getPath()); String outFileName = inFile.getName(); if (offsetInSource > 0) { outFileName += ("_" + offsetInSource); } outFileName += ".xmi"; outFile = new File((String) outputDir, outFileName); } catch (MalformedURLException e1) { // invalid URI, use default processing below } } } if (outFile == null) { outFile = new File((String) outputDir, "doc" + entityCount); } try { FileOutputStream outStream = new FileOutputStream(outFile); try { XmiCasSerializer.serialize(aCas, outStream); } finally { outStream.close(); } } catch (Exception e) { System.err.println("Could not save CAS to XMI file"); e.printStackTrace(); } } // update stats entityCount++; String docText = aCas.getDocumentText(); if (docText != null) { size += docText.length(); } // Called just before sendCas with next CAS from collection reader }