@Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
    Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
    Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);

    for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
      String tokenPOS = ((TokenAnnotation) annotation).getPosTag();

      if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
        AnnotationFS entityAnnotation =
            jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());

        entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());

        String name =
            "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
        if (annotation.getCoveredText().equals("Apache")) name = "ORGANIZATION";
        entityAnnotation.setStringValue(nameFeature, name);

        jcas.addFsToIndexes(entityAnnotation);
      }
    }
  }
Beispiel #2
0
  /** Performs pos-tagging on the given tcas object. */
  @Override
  public synchronized void process(CAS tcas) {

    final AnnotationComboIterator comboIterator =
        new AnnotationComboIterator(tcas, this.sentenceType, this.tokenType);

    for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {

      final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();

      final List<String> sentenceTokenList = new LinkedList<String>();

      for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {

        sentenceTokenAnnotationList.add(tokenAnnotation);

        sentenceTokenList.add(tokenAnnotation.getCoveredText());
      }

      final List<String> posTags = this.posTagger.tag(sentenceTokenList);

      double posProbabilities[] = null;

      if (this.probabilityFeature != null) {
        posProbabilities = this.posTagger.probs();
      }

      final Iterator<String> posTagIterator = posTags.iterator();
      final Iterator<AnnotationFS> sentenceTokenIterator = sentenceTokenAnnotationList.iterator();

      int index = 0;
      while (posTagIterator.hasNext() && sentenceTokenIterator.hasNext()) {
        final String posTag = posTagIterator.next();
        final AnnotationFS tokenAnnotation = sentenceTokenIterator.next();

        tokenAnnotation.setStringValue(this.posFeature, posTag);

        if (posProbabilities != null) {
          tokenAnnotation.setDoubleValue(this.posFeature, posProbabilities[index]);
        }

        index++;
      }

      // log tokens with pos
      if (this.logger.isLoggable(Level.FINER)) {

        final StringBuilder sentenceWithPos = new StringBuilder();

        sentenceWithPos.append("\"");

        for (final Iterator<AnnotationFS> it = sentenceTokenAnnotationList.iterator();
            it.hasNext(); ) {
          final AnnotationFS token = it.next();
          sentenceWithPos.append(token.getCoveredText());
          sentenceWithPos.append('\\');
          sentenceWithPos.append(token.getStringValue(this.posFeature));
          sentenceWithPos.append(' ');
        }

        // delete last whitespace
        if (sentenceWithPos.length() > 1) // not 0 because it contains already the " char
        sentenceWithPos.setLength(sentenceWithPos.length() - 1);

        sentenceWithPos.append("\"");

        this.logger.log(Level.FINER, sentenceWithPos.toString());
      }
    }
  }
Beispiel #3
0
 private void create(JCas cas, Feature feature, int begin, int end, String value) {
   Type type = feature.getDomain();
   AnnotationFS annotation = cas.getCas().createAnnotation(type, begin, end);
   annotation.setStringValue(feature, value);
   cas.addFsToIndexes(annotation);
 }