public static void showJCas(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator(); Map<String, MutableInt> counters = new TreeMap<String, MutableInt>(); int total = 0; while (it.hasNext()) { total += 1; String annoType = "rien"; try { Annotation annotation = (Annotation) it.next(); annoType = annotation.getType().getName(); } catch (NullPointerException e) { it.moveToNext(); annoType = e.getClass().getCanonicalName(); } if (counters.get(annoType) == null) { counters.put(annoType, new MutableInt(1)); } else { counters.get(annoType).increment(); } } System.out.println( "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total); for (String annoType : counters.keySet()) { System.out.println(annoType + ": " + counters.get(annoType)); } }
private static String getResult(JCas jcas, String sourceLang, String interLang) { FSIterator iter = jcas.getAnnotationIndex(Target.type).iterator(); String result = ""; while (iter.isValid()) { FeatureStructure fs = iter.get(); Target transText = (Target) fs; result = transText.getContent(); iter.moveToNext(); } return result; }
public static void main(String[] args) throws IOException, InvalidXMLException, CASException, ResourceInitializationException, SAXException { JCas jcas = null; // Leer el descriptor del anotador XMLParser xmlParser = UIMAFramework.getXMLParser(); XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml"); // Crear un AE en base al descriptor AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in); // Obtener el CAS jcas = CasCreationUtils.createCas(tsDesc).getJCas(); if (jcas != null) { // De-serializar la anotacion de un fichero FileInputStream inputStream = null; inputStream = new FileInputStream("resources/annotation.xmi"); XmiCasDeserializer.deserialize(inputStream, jcas.getCas()); // Obtener el texto de la anotacion String sofaString = jcas.getDocumentText(); System.out.println(sofaString); // Usar las anotaciones del fichero FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator(); while (it.isValid()) { Metric metric = (Metric) it.get(); Number number = metric.getNumber(); Unit unit = metric.getUnit(); Double value = (number.getIsDouble()) ? number.getAbsoluteDoubleValue() : Double.valueOf(number.getAbsoluteIntegerValue()); System.out.println("==================="); System.out.println("Metric: " + metric.getCoveredText()); System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier()); System.out.println("Base unit: " + unit.getBaseUnit()); it.moveToNext(); } } }
/** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */ private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false); // get document text String docText = cas.getDocumentText(); char[] docCharArray = docText.toCharArray(); // get iterator over annotations sorted by increasing start position and // decreasing end position FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator(); // This is basically a recursive algorithm that has had the recursion // removed through the use of an explicit Stack. We iterate over the // annotations, and if an annotation contains other annotations, we // push the parent annotation on the stack, process the children, and // then come back to the parent later. List<AnnotationFS> stack = new ArrayList<AnnotationFS>(); int pos = 0; ContentHandler handler = sax2xml.getContentHandler(); handler.startDocument(); // write the start tag converter.startRootElement(handler); // now use null is a placeholder for this artificial Document annotation AnnotationFS curAnnot = null; while (iterator.isValid()) { AnnotationFS nextAnnot = iterator.get(); if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) { // nextAnnot's start point is within the span of curAnnot if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check { // nextAnnot is contained within curAnnot // write text between current pos and beginning of nextAnnot try { handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos); pos = nextAnnot.getBegin(); converter.startAnnotationElement(nextAnnot, handler); // push parent annotation on stack stack.add(curAnnot); // move on to next annotation curAnnot = nextAnnot; } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + nextAnnot.getBegin() + "," + nextAnnot.getEnd() + " in document of length " + docText.length()); } } iterator.moveToNext(); } else { // nextAnnot begins after curAnnot ends // write text between current pos and end of curAnnot try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + " in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); // pop next containing annotation off stack curAnnot = stack.remove(stack.size() - 1); } } // finished writing all start tags, now finish up if (curAnnot != null) { try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + "in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); while (!stack.isEmpty()) { curAnnot = stack.remove(stack.size() - 1); // pop if (curAnnot == null) { break; } try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + "in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); } } if (pos < docCharArray.length) { handler.characters(docCharArray, pos, docCharArray.length - pos); } converter.endRootElement(handler); handler.endDocument(); // return XML string return new String(byteArrayOutputStream.toByteArray()); }