@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME); Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE); Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE); for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) { String tokenPOS = ((TokenAnnotation) annotation).getPosTag(); if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) { AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd()); entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText()); String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc. if (annotation.getCoveredText().equals("Apache")) name = "ORGANIZATION"; entityAnnotation.setStringValue(nameFeature, name); jcas.addFsToIndexes(entityAnnotation); } } }
/** Performs pos-tagging on the given tcas object. */ @Override public synchronized void process(CAS tcas) { final AnnotationComboIterator comboIterator = new AnnotationComboIterator(tcas, this.sentenceType, this.tokenType); for (AnnotationIteratorPair annotationIteratorPair : comboIterator) { final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>(); final List<String> sentenceTokenList = new LinkedList<String>(); for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) { sentenceTokenAnnotationList.add(tokenAnnotation); sentenceTokenList.add(tokenAnnotation.getCoveredText()); } final List<String> posTags = this.posTagger.tag(sentenceTokenList); double posProbabilities[] = null; if (this.probabilityFeature != null) { posProbabilities = this.posTagger.probs(); } final Iterator<String> posTagIterator = posTags.iterator(); final Iterator<AnnotationFS> sentenceTokenIterator = sentenceTokenAnnotationList.iterator(); int index = 0; while (posTagIterator.hasNext() && sentenceTokenIterator.hasNext()) { final String posTag = posTagIterator.next(); final AnnotationFS tokenAnnotation = sentenceTokenIterator.next(); tokenAnnotation.setStringValue(this.posFeature, posTag); if (posProbabilities != null) { tokenAnnotation.setDoubleValue(this.posFeature, posProbabilities[index]); } index++; } // log tokens with pos if (this.logger.isLoggable(Level.FINER)) { final StringBuilder sentenceWithPos = new StringBuilder(); sentenceWithPos.append("\""); for (final Iterator<AnnotationFS> it = sentenceTokenAnnotationList.iterator(); it.hasNext(); ) { final AnnotationFS token = it.next(); sentenceWithPos.append(token.getCoveredText()); sentenceWithPos.append('\\'); sentenceWithPos.append(token.getStringValue(this.posFeature)); sentenceWithPos.append(' '); } // delete last whitespace if (sentenceWithPos.length() > 1) // not 0 because it contains already the " char sentenceWithPos.setLength(sentenceWithPos.length() - 1); sentenceWithPos.append("\""); this.logger.log(Level.FINER, sentenceWithPos.toString()); } } }
private void create(JCas cas, Feature feature, int begin, int end, String value) { Type type = feature.getDomain(); AnnotationFS annotation = cas.getCas().createAnnotation(type, begin, end); annotation.setStringValue(feature, value); cas.addFsToIndexes(annotation); }