@Override public void process(JCas cas) throws AnalysisEngineProcessException { this.setSource(cas); this.setTarget(cas); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType()); FSIterator<Annotation> iter = index.iterator(); while (iter.hasNext()) { Annotation annotation = iter.next(); String source = null; if (this.getSourceFeature() == null) { source = annotation.getCoveredText(); } else { source = annotation.getStringValue(this.getSourceFeature()); } if (source != null) { String target = this.getMapping().get(source); if (target != null) { if (this.update().booleanValue()) { this.update(cas, annotation, this.getTargetFeature(), target); } else { this.create( cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target); } } } } }
/** {@inheritDoc} */ @Override public void onMatching( String docUri, Type type, SortedSet<Annotation> goldAnnos, SortedSet<Annotation> sysAnnos) { if (goldAnnos.size() == 1 && sysAnnos.size() == 1) { Annotation goldAnno = goldAnnos.iterator().next(); Annotation sysAnno = sysAnnos.iterator().next(); if (goldAnno.getBegin() == sysAnno.getBegin() && goldAnno.getEnd() == sysAnno.getEnd()) { printRow( type.getShortName(), "Exact", goldAnno.getCoveredText(), String.valueOf(goldAnno.getBegin()), sysAnno.getCoveredText(), String.valueOf(sysAnno.getBegin()), docUri); return; } } printRow( type.getShortName(), "Partial", Joiner.on(" /// ").join(transform(goldAnnos, annoToTxt)), Joiner.on(", ").join(transform(goldAnnos, annoToOffset)), Joiner.on(" /// ").join(transform(sysAnnos, annoToTxt)), Joiner.on(", ").join(transform(sysAnnos, annoToOffset)), docUri); }
public static void showJCas(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator(); Map<String, MutableInt> counters = new TreeMap<String, MutableInt>(); int total = 0; while (it.hasNext()) { total += 1; String annoType = "rien"; try { Annotation annotation = (Annotation) it.next(); annoType = annotation.getType().getName(); } catch (NullPointerException e) { it.moveToNext(); annoType = e.getClass().getCanonicalName(); } if (counters.get(annoType) == null) { counters.put(annoType, new MutableInt(1)); } else { counters.get(annoType).increment(); } } System.out.println( "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total); for (String annoType : counters.keySet()) { System.out.println(annoType + ": " + counters.get(annoType)); } }
/** {@inheritDoc} */ @Override public void onSpurious(String docUri, Type type, Annotation sysAnno) { printRow( type.getShortName(), "Spurious", null, null, sysAnno.getCoveredText(), String.valueOf(sysAnno.getBegin()), docUri); }
/** {@inheritDoc} */ @Override public void onMissing(String docUri, Type type, Annotation goldAnno) { printRow( type.getShortName(), "Missing", goldAnno.getCoveredText(), String.valueOf(goldAnno.getBegin()), null, null, docUri); }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { if (windowClass != null) { for (Annotation window : JCasUtil.select(jCas, windowClass)) { String text = window.getCoveredText(); createParentheticals(jCas, text, window.getBegin()); } } else { String text = jCas.getDocumentText(); createParentheticals(jCas, text, 0); } }
@Override public void process(final JCas jCas) throws AnalysisEngineProcessException { beforeRuleProcessing(jCas); if (!this.labelFeature.equals(NO_SET_LABEL)) { // Must set labels FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator(); Feature feat = this.getIteratedType(jCas).getFeatureByBaseName(this.labelFeature); while (it.hasNext()) { Annotation word = (Annotation) it.next(); word.setStringValue(feat, this.resource.getMatchingLabelString(word)); } } RecognitionHandler recognitionHandler = new RecognitionHandler() { @Override public void recognizedEpisode(RegexOccurrence episode) { ruleMatched(jCas, episode); } }; for (final Rule rule : this.resource.getRules()) { rule.getAutomaton().setAllowOverlappingInstances(this.allowOverlappingOccurrences); rule.getAutomaton().addRecognitionHandler(recognitionHandler); rule.getAutomaton().reset(); } FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator(); while (it.hasNext()) { Annotation word = (Annotation) it.next(); boolean allRulesFailed = true; for (Rule rule : this.resource.getRules()) { rule.getAutomaton().nextAnnotation(word); allRulesFailed &= rule.getAutomaton().currentInstancesNum() == 0; } if (allRulesFailed) allRulesFailed(jCas); } for (Rule rule : this.resource.getRules()) rule.getAutomaton().finish(); for (final Rule rule : this.resource.getRules()) rule.getAutomaton().removeRecognitionHandler(recognitionHandler); afterRuleProcessing(jCas); }
private void createParentheticals(JCas jCas, String text, int offset) throws AnalysisEngineProcessException { Stack<Integer> leftRoundedParens = new Stack<Integer>(); leftRoundedParens.clear(); for (int ci = 0; ci < text.length(); ci++) { char c = text.charAt(ci); if (c == leftParen) { leftRoundedParens.push(ci); } if (c == rightParen && !leftRoundedParens.isEmpty()) { int leftOffset = leftRoundedParens.pop(); Annotation ann; try { ann = parentheticalConstructor.newInstance(jCas, offset + leftOffset, offset + ci + 1); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } ann.addToIndexes(); } } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME); Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE); Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE); for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) { String tokenPOS = ((TokenAnnotation) annotation).getPosTag(); if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) { AnnotationFS entityAnnotation = jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd()); entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText()); String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc. if (annotation.getCoveredText().equals("Apache")) name = "ORGANIZATION"; entityAnnotation.setStringValue(nameFeature, name); jcas.addFsToIndexes(entityAnnotation); } } }
/** Outputs Trigrams for an input Annotation. */ public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) { AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type); Iterator<Annotation> tIterator = tokens.subiterator(annotation); Annotation token = null; Annotation prev = null; Annotation prev_prev = null; while (tIterator.hasNext()) { prev_prev = prev; prev = token; token = tIterator.next(); if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) { NGram ngram = new NGram(aJCas); FSArray ngArray = new FSArray(aJCas, 3); ngArray.set(0, prev_prev); ngArray.set(1, prev); ngArray.set(2, token); ngram.setBegin(prev_prev.getBegin()); ngram.setEnd(token.getEnd()); ngram.setElements(ngArray); ngram.setElementType("edu.cmu.deiis.types.Token"); ngram.setConfidence(1D); ngram.setCasProcessorId(PROCESSOR_ID); ngram.addToIndexes(); } } }
private void remove() { for (Annotation annotation : this.getAnnotations()) { annotation.removeFromIndexes(); } }
/** * Static method that changes the annotation passed by off-setting it offset from zero * * @param annotation * @param offset - The offset as computed by (copy - original) */ private static void updateAnnotation(Annotation annotation, int offset) { if (annotation == null) return; annotation.setBegin(annotation.getBegin() + offset); annotation.setEnd(annotation.getEnd() + offset); }
@Override public Integer apply(Annotation input) { return input.getBegin(); }
@Override public String apply(Annotation input) { return input.getCoveredText(); }
private void update(JCas cas, Annotation annotation, Feature feature, String value) { annotation.setStringValue(feature, value); }
public static boolean sameIndexes(Annotation anno1, Annotation anno2) { return anno1.getBegin() == anno2.getBegin() && anno1.getEnd() == anno2.getEnd(); }
public static boolean contains(Annotation container, Annotation subAnnotation) { return container.getBegin() <= subAnnotation.getBegin() && container.getEnd() >= subAnnotation.getEnd(); }
public static Tree createStanfordTree(Annotation root, TreeFactory tFact) { JCas aJCas; try { aJCas = root.getCAS().getJCas(); } catch (CASException e) { throw new IllegalStateException("Unable to get JCas from JCas wrapper"); } // define the new (root) node Tree rootNode; // before we can create a node, we must check if we have any children (we have to know // whether to create a node or a leaf - not very dynamic) if (root instanceof Constituent && !isLeaf((Constituent) root)) { Constituent node = (Constituent) root; List<Tree> childNodes = new ArrayList<Tree>(); // get childNodes from child annotations FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { childNodes.add(createStanfordTree(node.getChildren(i), tFact)); } // now create the node with its children rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes); } else { // Handle leaf annotations // Leafs are always Token-annotations // We also have to insert a Preterminal node with the value of the // POS-Annotation on the token // because the POS is not directly stored within the treee Token wordAnnotation = (Token) root; // create leaf-node for the tree Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText()); // create information about preceding and trailing whitespaces in the leaf node StringBuilder preWhitespaces = new StringBuilder(); StringBuilder trailWhitespaces = new StringBuilder(); List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1); List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1); if (precedingTokenList.size() > 0) { Token precedingToken = precedingTokenList.get(0); int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd(); for (int i = 0; i < precedingWhitespaces; i++) { preWhitespaces.append(" "); } } if (followingTokenList.size() > 0) { Token followingToken = followingTokenList.get(0); int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd(); for (int i = 0; i < trailingWhitespaces; i++) { trailWhitespaces.append(" "); } } // write whitespace information as CoreAnnotation.BeforeAnnotation and // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to // node label ((CoreLabel) wordNode.label()) .set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString()); ((CoreLabel) wordNode.label()) .set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString()); // get POS-annotation // get the token that is covered by the POS List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation); // the POS should only cover one token assert coveredPos.size() == 1; POS pos = coveredPos.get(0); // create POS-Node in the tree and attach word-node to it rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] {wordNode}))); } return rootNode; }
/** * Recreates a Stanford Tree from the StanfordParser annotations and saves all * non-StanfordParser-Annotations within the scope of the sentence in the label of the best * fitting node. * * <p><strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!! * You do NOT want to use the source CAS after this method has been called. The * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations do * not have to be recovered or accessed in the tree.</i> * * <p>TODO: This behavior could be changed by making COPIES of the annotations and changing the * copied instead of the originals. However, in order to being able to make copies, a dummy CAS * must be introduced to which the annotations can be copied. When they are recovered, they will * be copied to the new destination CAS anyway. * * @param root the ROOT annotation * @return an {@link Tree} object representing the syntax structure of the sentence * @throws CASException if the JCas cannot be accessed. */ public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException { JCas aJCas = root.getCAS().getJCas(); // Create tree Tree tree = createStanfordTree(root); // Get all non-parser related annotations // and all tokens (needed for span-calculations later on) List<Annotation> nonParserAnnotations = new ArrayList<Annotation>(); List<Token> tokens = new ArrayList<Token>(); // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all // cases List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root); for (Annotation curAnno : annosWithinRoot) { if (!(curAnno instanceof POS) && !(curAnno instanceof Constituent) && !(curAnno instanceof Dependency) && !(curAnno instanceof PennTree) && !(curAnno instanceof Lemma) && !(curAnno instanceof Token) && !(curAnno instanceof DocumentMetaData)) { nonParserAnnotations.add(curAnno); } else if (curAnno instanceof Token) { tokens.add((Token) curAnno); } } // create wrapper for tree and its tokens TreeWithTokens annoTree = new TreeWithTokens(tree, tokens); /* * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the * deepest node in the tree that still completely contains the annotation. */ for (Annotation curAnno : nonParserAnnotations) { // get best fitting node Tree bestFittingNode = annoTree.getBestFit(curAnno); // Add annotation to node if (bestFittingNode != null) { // translate annotation span to a value relative to the // node-span IntPair span = annoTree.getSpan(bestFittingNode); curAnno.setBegin(curAnno.getBegin() - span.getSource()); curAnno.setEnd(curAnno.getEnd() - span.getSource()); // get the collection from the label of the best-fitting node in which we store UIMA // annotations or create it, if it does not exist Collection<Annotation> annotations = ((CoreLabel) bestFittingNode.label()).get(UIMAAnnotations.class); if (annotations == null) { annotations = new ArrayList<Annotation>(); } // add annotation + checksum of annotated text to list and write it back to node // label annotations.add(curAnno); ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations); } } return tree; }
/** * Echo in the standard output (the console) the type and the covered text of the given annotation * * @param anAnnotation */ public static void echo(Annotation anAnnotation) { System.out.printf( "type>%s<\t\tcoveredText>%s<\n", anAnnotation.getClass().getSimpleName(), anAnnotation.getCoveredText()); }