/**
   * abnerNER would analyze words and give confidence
   *
   * @param args sentences to be processed
   * @param arg0 information input
   * @throws AnalysisEngineProcessException
   */
  public static void abnerNER(String[] args, JCas arg0) {

    HashMap<String, Double> ConfMap = new HashMap<String, Double>();

    /** use abner to find gene names from words */
    Tagger t = new Tagger();
    for (int i = 1; i < args.length; ++i) {
      String s = args[i];
      String[][] ents = t.getEntities(s);

      /** use HashMap to store words selected by Abner */
      for (int j = 0; j < ents[0].length; j++) {
        ConfMap.put(ents[0][j], 1.0);
      }
    }

    // TODO Auto-generated method stub
    FSIterator<org.apache.uima.jcas.tcas.Annotation> ite =
        arg0.getAnnotationIndex(NameTag.type).iterator();

    while (ite.hasNext()) {
      /** get the words selected by LingPipe */
      String name = ((NameTag) ite.get()).getText();

      /** set the confidence for words selected by both LingPipe and Abner as 1 */
      if (ConfMap.containsKey(name)) {
        ((NameTag) ite.get()).setConfidenceAbner(1.0);
      } else {
        ((NameTag) ite.get()).setConfidenceAbner(0.0);
      }
      ite.next();
    }
  }
Пример #2
0
 private static String getResult(JCas jcas, String sourceLang, String interLang) {
   FSIterator iter = jcas.getAnnotationIndex(Target.type).iterator();
   String result = "";
   while (iter.isValid()) {
     FeatureStructure fs = iter.get();
     Target transText = (Target) fs;
     result = transText.getContent();
     iter.moveToNext();
   }
   return result;
 }
Пример #3
0
  public static void main(String[] args)
      throws IOException, InvalidXMLException, CASException, ResourceInitializationException,
          SAXException {
    JCas jcas = null;

    // Leer el descriptor del anotador
    XMLParser xmlParser = UIMAFramework.getXMLParser();
    XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml");
    // Crear un AE en base al descriptor
    AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in);
    // Obtener el CAS
    jcas = CasCreationUtils.createCas(tsDesc).getJCas();

    if (jcas != null) {
      // De-serializar la anotacion de un fichero
      FileInputStream inputStream = null;
      inputStream = new FileInputStream("resources/annotation.xmi");
      XmiCasDeserializer.deserialize(inputStream, jcas.getCas());

      // Obtener el texto de la anotacion
      String sofaString = jcas.getDocumentText();
      System.out.println(sofaString);

      // Usar las anotaciones del fichero
      FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator();
      while (it.isValid()) {
        Metric metric = (Metric) it.get();
        Number number = metric.getNumber();
        Unit unit = metric.getUnit();
        Double value =
            (number.getIsDouble())
                ? number.getAbsoluteDoubleValue()
                : Double.valueOf(number.getAbsoluteIntegerValue());
        System.out.println("===================");
        System.out.println("Metric: " + metric.getCoveredText());
        System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier());
        System.out.println("Base unit: " + unit.getBaseUnit());
        it.moveToNext();
      }
    }
  }
  /**
   * CasConsumer would use tags and features to write output file, evaluate and print precision,
   * recall and F-1 measure.
   *
   * @param arg0
   * @throws ResourceProcessException
   */
  @Override
  public void processCas(CAS arg0) throws ResourceProcessException {
    /** convert type of arg0 */
    JCas jcas = null;
    try {
      jcas = arg0.getJCas();
    } catch (CASException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    // TODO Auto-generated method stub
    FSIterator<Annotation> ite = jcas.getAnnotationIndex(WordTag.type).iterator();

    while (ite.hasNext()) {
      /** collect features */
      String id = ((WordTag) ite.get()).getId();
      int begin = ((WordTag) ite.get()).getBegin0();
      int end = ((WordTag) ite.get()).getEnd0();
      String name = ((WordTag) ite.get()).getName();

      /** organize string for output */
      report.append(id);
      report.append("|");
      report.append(begin);
      report.append(" ");
      report.append(end);
      report.append("|");
      report.append(name);
      report.append("\n");

      /** count the length of output string */
      count++;
      ite.next();
    }

    result = report.toString();
    File sampleOut = new File("src/main/resources/data/sample.out");
    try {
      testRecall = FileUtils.file2String(sampleOut);
    } catch (IOException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }

    /** split strings from file into sentences */
    String[] resultSplit = result.split("\n");
    String[] recallSplit = testRecall.split("\n");
    PrecisionRecallCalculator(recallSplit, resultSplit);

    /** write the output file to the project root */
    String path = "hw1-longh.out";
    File dirFile = new File(path);

    /** make sure no conflict */
    if (dirFile.exists()) {
      dirFile.delete();
    }

    try {
      /** write file */
      BufferedWriter bw1 = new BufferedWriter(new FileWriter(path, true));
      bw1.write(report.toString());
      bw1.flush();
      bw1.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
Пример #5
0
  /** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */
  private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false);

    // get document text
    String docText = cas.getDocumentText();
    char[] docCharArray = docText.toCharArray();

    // get iterator over annotations sorted by increasing start position and
    // decreasing end position
    FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator();

    // This is basically a recursive algorithm that has had the recursion
    // removed through the use of an explicit Stack. We iterate over the
    // annotations, and if an annotation contains other annotations, we
    // push the parent annotation on the stack, process the children, and
    // then come back to the parent later.
    List<AnnotationFS> stack = new ArrayList<AnnotationFS>();
    int pos = 0;

    ContentHandler handler = sax2xml.getContentHandler();
    handler.startDocument();
    // write the start tag
    converter.startRootElement(handler);
    // now use null is a placeholder for this artificial Document annotation
    AnnotationFS curAnnot = null;

    while (iterator.isValid()) {
      AnnotationFS nextAnnot = iterator.get();

      if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) {
        // nextAnnot's start point is within the span of curAnnot
        if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check
        {
          // nextAnnot is contained within curAnnot

          // write text between current pos and beginning of nextAnnot
          try {
            handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos);
            pos = nextAnnot.getBegin();
            converter.startAnnotationElement(nextAnnot, handler);

            // push parent annotation on stack
            stack.add(curAnnot);
            // move on to next annotation
            curAnnot = nextAnnot;
          } catch (StringIndexOutOfBoundsException e) {
            System.err.println(
                "Invalid annotation range: "
                    + nextAnnot.getBegin()
                    + ","
                    + nextAnnot.getEnd()
                    + " in document of length "
                    + docText.length());
          }
        }
        iterator.moveToNext();
      } else {
        // nextAnnot begins after curAnnot ends
        // write text between current pos and end of curAnnot
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + " in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);

        // pop next containing annotation off stack
        curAnnot = stack.remove(stack.size() - 1);
      }
    }

    // finished writing all start tags, now finish up
    if (curAnnot != null) {
      try {
        handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
        pos = curAnnot.getEnd();
      } catch (StringIndexOutOfBoundsException e) {
        System.err.println(
            "Invalid annotation range: "
                + curAnnot.getBegin()
                + ","
                + curAnnot.getEnd()
                + "in document of length "
                + docText.length());
      }
      converter.endAnnotationElement(curAnnot, handler);

      while (!stack.isEmpty()) {
        curAnnot = stack.remove(stack.size() - 1); // pop
        if (curAnnot == null) {
          break;
        }
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + "in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);
      }
    }

    if (pos < docCharArray.length) {
      handler.characters(docCharArray, pos, docCharArray.length - pos);
    }
    converter.endRootElement(handler);
    handler.endDocument();

    // return XML string
    return new String(byteArrayOutputStream.toByteArray());
  }
Пример #6
0
    /**
     * Called when the processing of a Document is completed. <br>
     * The process status can be looked at and corresponding actions taken.
     *
     * @param aCas CAS corresponding to the completed processing
     * @param aStatus EntityProcessStatus that holds the status of all the events for aEntity
     */
    public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
      if (aStatus != null) {
        if (aStatus.isException()) {
          System.err.println("Error on process CAS call to remote service:");
          List exceptions = aStatus.getExceptions();
          for (int i = 0; i < exceptions.size(); i++) {
            ((Throwable) exceptions.get(i)).printStackTrace();
          }
          if (!ignoreErrors) {
            System.err.println("Terminating Client...");
            stop();
          }
        }
        if (logCas) {
          String ip = "no IP";
          List eList = aStatus.getProcessTrace().getEventsByComponentName("UimaEE", false);
          for (int e = 0; e < eList.size(); e++) {
            ProcessTraceEvent event = (ProcessTraceEvent) eList.get(e);
            if (event.getDescription().equals("Service IP")) {
              ip = event.getResultMessage();
            }
          }
          String casId = ((UimaASProcessStatus) aStatus).getCasReferenceId();
          if (casId != null) {
            long current = System.nanoTime() / 1000000 - mStartTime;
            if (casMap.containsKey(casId)) {
              Object value = casMap.get(casId);
              if (value != null && value instanceof Long) {
                long start = ((Long) value).longValue();
                System.out.println(ip + "\t" + start + "\t" + (current - start));
              }
            }
          }

        } else {
          System.out.print(".");
          if (0 == (entityCount + 1) % 50) {
            System.out.print((entityCount + 1) + " processed\n");
          }
        }
      }

      // if output dir specified, dump CAS to XMI
      if (outputDir != null) {
        // try to retrieve the filename of the input file from the CAS
        File outFile = null;
        Type srcDocInfoType =
            aCas.getTypeSystem().getType("org.apache.uima.examples.SourceDocumentInformation");
        if (srcDocInfoType != null) {
          FSIterator it = aCas.getIndexRepository().getAllIndexedFS(srcDocInfoType);
          if (it.hasNext()) {
            FeatureStructure srcDocInfoFs = it.get();
            Feature uriFeat = srcDocInfoType.getFeatureByBaseName("uri");
            Feature offsetInSourceFeat = srcDocInfoType.getFeatureByBaseName("offsetInSource");
            String uri = srcDocInfoFs.getStringValue(uriFeat);
            int offsetInSource = srcDocInfoFs.getIntValue(offsetInSourceFeat);
            File inFile;
            try {
              inFile = new File(new URL(uri).getPath());
              String outFileName = inFile.getName();
              if (offsetInSource > 0) {
                outFileName += ("_" + offsetInSource);
              }
              outFileName += ".xmi";
              outFile = new File((String) outputDir, outFileName);
            } catch (MalformedURLException e1) {
              // invalid URI, use default processing below
            }
          }
        }
        if (outFile == null) {
          outFile = new File((String) outputDir, "doc" + entityCount);
        }
        try {
          FileOutputStream outStream = new FileOutputStream(outFile);
          try {
            XmiCasSerializer.serialize(aCas, outStream);
          } finally {
            outStream.close();
          }
        } catch (Exception e) {
          System.err.println("Could not save CAS to XMI file");
          e.printStackTrace();
        }
      }

      // update stats
      entityCount++;
      String docText = aCas.getDocumentText();
      if (docText != null) {
        size += docText.length();
      }

      // Called just before sendCas with next CAS from collection reader
    }