/** Performs name finding on the given cas object. */
  public final void process(CAS cas) {

    if (isRemoveExistingAnnotations) {
      final AnnotationComboIterator sentenceNameCombo =
          new AnnotationComboIterator(cas, mSentenceType, mNameType);

      List<AnnotationFS> removeAnnotations = new LinkedList<AnnotationFS>();
      for (AnnotationIteratorPair annotationIteratorPair : sentenceNameCombo) {
        for (AnnotationFS nameAnnotation : annotationIteratorPair.getSubIterator()) {
          removeAnnotations.add(nameAnnotation);
        }
      }

      for (AnnotationFS annotation : removeAnnotations) {
        cas.removeFsFromIndexes(annotation);
      }
    }

    final AnnotationComboIterator sentenceTokenCombo =
        new AnnotationComboIterator(cas, mSentenceType, mTokenType);

    for (AnnotationIteratorPair annotationIteratorPair : sentenceTokenCombo) {

      final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();

      final List<String> sentenceTokenList = new LinkedList<String>();

      for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {

        sentenceTokenAnnotationList.add(tokenAnnotation);

        sentenceTokenList.add(tokenAnnotation.getCoveredText());
      }

      Span[] names =
          find(cas, (String[]) sentenceTokenList.toArray(new String[sentenceTokenList.size()]));

      AnnotationFS nameAnnotations[] = new AnnotationFS[names.length];

      for (int i = 0; i < names.length; i++) {

        int startIndex =
            ((AnnotationFS) sentenceTokenAnnotationList.get(names[i].getStart())).getBegin();

        int endIndex =
            ((AnnotationFS) sentenceTokenAnnotationList.get(names[i].getEnd() - 1)).getEnd();

        nameAnnotations[i] = cas.createAnnotation(mNameType, startIndex, endIndex);

        cas.getIndexRepository().addFS(nameAnnotations[i]);
      }

      postProcessAnnotations(names, nameAnnotations);
    }

    documentDone(cas);
  }
Пример #2
0
    /**
     * Called when the processing of a Document is completed. <br>
     * The process status can be looked at and corresponding actions taken.
     *
     * @param aCas CAS corresponding to the completed processing
     * @param aStatus EntityProcessStatus that holds the status of all the events for aEntity
     */
    public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
      if (aStatus != null) {
        if (aStatus.isException()) {
          System.err.println("Error on process CAS call to remote service:");
          List exceptions = aStatus.getExceptions();
          for (int i = 0; i < exceptions.size(); i++) {
            ((Throwable) exceptions.get(i)).printStackTrace();
          }
          if (!ignoreErrors) {
            System.err.println("Terminating Client...");
            stop();
          }
        }
        if (logCas) {
          String ip = "no IP";
          List eList = aStatus.getProcessTrace().getEventsByComponentName("UimaEE", false);
          for (int e = 0; e < eList.size(); e++) {
            ProcessTraceEvent event = (ProcessTraceEvent) eList.get(e);
            if (event.getDescription().equals("Service IP")) {
              ip = event.getResultMessage();
            }
          }
          String casId = ((UimaASProcessStatus) aStatus).getCasReferenceId();
          if (casId != null) {
            long current = System.nanoTime() / 1000000 - mStartTime;
            if (casMap.containsKey(casId)) {
              Object value = casMap.get(casId);
              if (value != null && value instanceof Long) {
                long start = ((Long) value).longValue();
                System.out.println(ip + "\t" + start + "\t" + (current - start));
              }
            }
          }

        } else {
          System.out.print(".");
          if (0 == (entityCount + 1) % 50) {
            System.out.print((entityCount + 1) + " processed\n");
          }
        }
      }

      // if output dir specified, dump CAS to XMI
      if (outputDir != null) {
        // try to retrieve the filename of the input file from the CAS
        File outFile = null;
        Type srcDocInfoType =
            aCas.getTypeSystem().getType("org.apache.uima.examples.SourceDocumentInformation");
        if (srcDocInfoType != null) {
          FSIterator it = aCas.getIndexRepository().getAllIndexedFS(srcDocInfoType);
          if (it.hasNext()) {
            FeatureStructure srcDocInfoFs = it.get();
            Feature uriFeat = srcDocInfoType.getFeatureByBaseName("uri");
            Feature offsetInSourceFeat = srcDocInfoType.getFeatureByBaseName("offsetInSource");
            String uri = srcDocInfoFs.getStringValue(uriFeat);
            int offsetInSource = srcDocInfoFs.getIntValue(offsetInSourceFeat);
            File inFile;
            try {
              inFile = new File(new URL(uri).getPath());
              String outFileName = inFile.getName();
              if (offsetInSource > 0) {
                outFileName += ("_" + offsetInSource);
              }
              outFileName += ".xmi";
              outFile = new File((String) outputDir, outFileName);
            } catch (MalformedURLException e1) {
              // invalid URI, use default processing below
            }
          }
        }
        if (outFile == null) {
          outFile = new File((String) outputDir, "doc" + entityCount);
        }
        try {
          FileOutputStream outStream = new FileOutputStream(outFile);
          try {
            XmiCasSerializer.serialize(aCas, outStream);
          } finally {
            outStream.close();
          }
        } catch (Exception e) {
          System.err.println("Could not save CAS to XMI file");
          e.printStackTrace();
        }
      }

      // update stats
      entityCount++;
      String docText = aCas.getDocumentText();
      if (docText != null) {
        size += docText.length();
      }

      // Called just before sendCas with next CAS from collection reader
    }