예제 #1
0
 public static void showJCas(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator();
   Map<String, MutableInt> counters = new TreeMap<String, MutableInt>();
   int total = 0;
   while (it.hasNext()) {
     total += 1;
     String annoType = "rien";
     try {
       Annotation annotation = (Annotation) it.next();
       annoType = annotation.getType().getName();
     } catch (NullPointerException e) {
       it.moveToNext();
       annoType = e.getClass().getCanonicalName();
     }
     if (counters.get(annoType) == null) {
       counters.put(annoType, new MutableInt(1));
     } else {
       counters.get(annoType).increment();
     }
   }
   System.out.println(
       "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total);
   for (String annoType : counters.keySet()) {
     System.out.println(annoType + ": " + counters.get(annoType));
   }
 }
예제 #2
0
 private static String getResult(JCas jcas, String sourceLang, String interLang) {
   FSIterator iter = jcas.getAnnotationIndex(Target.type).iterator();
   String result = "";
   while (iter.isValid()) {
     FeatureStructure fs = iter.get();
     Target transText = (Target) fs;
     result = transText.getContent();
     iter.moveToNext();
   }
   return result;
 }
예제 #3
0
  public static void main(String[] args)
      throws IOException, InvalidXMLException, CASException, ResourceInitializationException,
          SAXException {
    JCas jcas = null;

    // Leer el descriptor del anotador
    XMLParser xmlParser = UIMAFramework.getXMLParser();
    XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml");
    // Crear un AE en base al descriptor
    AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in);
    // Obtener el CAS
    jcas = CasCreationUtils.createCas(tsDesc).getJCas();

    if (jcas != null) {
      // De-serializar la anotacion de un fichero
      FileInputStream inputStream = null;
      inputStream = new FileInputStream("resources/annotation.xmi");
      XmiCasDeserializer.deserialize(inputStream, jcas.getCas());

      // Obtener el texto de la anotacion
      String sofaString = jcas.getDocumentText();
      System.out.println(sofaString);

      // Usar las anotaciones del fichero
      FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator();
      while (it.isValid()) {
        Metric metric = (Metric) it.get();
        Number number = metric.getNumber();
        Unit unit = metric.getUnit();
        Double value =
            (number.getIsDouble())
                ? number.getAbsoluteDoubleValue()
                : Double.valueOf(number.getAbsoluteIntegerValue());
        System.out.println("===================");
        System.out.println("Metric: " + metric.getCoveredText());
        System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier());
        System.out.println("Base unit: " + unit.getBaseUnit());
        it.moveToNext();
      }
    }
  }
예제 #4
0
  /** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */
  private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false);

    // get document text
    String docText = cas.getDocumentText();
    char[] docCharArray = docText.toCharArray();

    // get iterator over annotations sorted by increasing start position and
    // decreasing end position
    FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator();

    // This is basically a recursive algorithm that has had the recursion
    // removed through the use of an explicit Stack. We iterate over the
    // annotations, and if an annotation contains other annotations, we
    // push the parent annotation on the stack, process the children, and
    // then come back to the parent later.
    List<AnnotationFS> stack = new ArrayList<AnnotationFS>();
    int pos = 0;

    ContentHandler handler = sax2xml.getContentHandler();
    handler.startDocument();
    // write the start tag
    converter.startRootElement(handler);
    // now use null is a placeholder for this artificial Document annotation
    AnnotationFS curAnnot = null;

    while (iterator.isValid()) {
      AnnotationFS nextAnnot = iterator.get();

      if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) {
        // nextAnnot's start point is within the span of curAnnot
        if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check
        {
          // nextAnnot is contained within curAnnot

          // write text between current pos and beginning of nextAnnot
          try {
            handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos);
            pos = nextAnnot.getBegin();
            converter.startAnnotationElement(nextAnnot, handler);

            // push parent annotation on stack
            stack.add(curAnnot);
            // move on to next annotation
            curAnnot = nextAnnot;
          } catch (StringIndexOutOfBoundsException e) {
            System.err.println(
                "Invalid annotation range: "
                    + nextAnnot.getBegin()
                    + ","
                    + nextAnnot.getEnd()
                    + " in document of length "
                    + docText.length());
          }
        }
        iterator.moveToNext();
      } else {
        // nextAnnot begins after curAnnot ends
        // write text between current pos and end of curAnnot
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + " in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);

        // pop next containing annotation off stack
        curAnnot = stack.remove(stack.size() - 1);
      }
    }

    // finished writing all start tags, now finish up
    if (curAnnot != null) {
      try {
        handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
        pos = curAnnot.getEnd();
      } catch (StringIndexOutOfBoundsException e) {
        System.err.println(
            "Invalid annotation range: "
                + curAnnot.getBegin()
                + ","
                + curAnnot.getEnd()
                + "in document of length "
                + docText.length());
      }
      converter.endAnnotationElement(curAnnot, handler);

      while (!stack.isEmpty()) {
        curAnnot = stack.remove(stack.size() - 1); // pop
        if (curAnnot == null) {
          break;
        }
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + "in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);
      }
    }

    if (pos < docCharArray.length) {
      handler.characters(docCharArray, pos, docCharArray.length - pos);
    }
    converter.endRootElement(handler);
    handler.endDocument();

    // return XML string
    return new String(byteArrayOutputStream.toByteArray());
  }