@Override public int compare(AnnotationFS arg0, AnnotationFS arg1) { int beginDiff = arg0.getBegin() - arg1.getBegin(); if (beginDiff == 0) { return arg1.getEnd() - arg0.getEnd(); } else { return beginDiff; } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { // sentenceCount = 0; tokenCount = 0; String text = jcas.getDocumentText(); String[] zones = getZoneTypes(); if (isStrictZoning()) { if (zones == null || zones.length == 0) { process(jcas, text.substring(0, text.length()), 0); } else if (zones.length != 1) { throw new AnalysisEngineProcessException( new IllegalStateException("Strict zoning cannot use multiple zone types")); } else { CAS cas = jcas.getCas(); for (AnnotationFS zone : select(cas, getType(cas, zones[0]))) { int[] adjusted = limit(text, zone.getBegin(), zone.getEnd()); process(jcas, text.substring(adjusted[0], adjusted[1]), adjusted[0]); } } } else { // This set collects all zone boundaries. SortedSet<Integer> boundarySet = new TreeSet<Integer>(); boundarySet.add(0); // Add start boundary boundarySet.add(text.length()); // Add end boundary // If zoneTypes have been define then get the boundaries, otherwise we will // simply have one big zone covering the whole document. if (zones != null) { // Iterate over all the zone indices and create sentences respecting // the zone boundaries. If the zoneTypes overlap... well... bad luck! for (String zoneName : zones) { CAS cas = jcas.getCas(); for (AnnotationFS zone : select(cas, getType(cas, zoneName))) { int[] adjusted = limit(text, zone.getBegin(), zone.getEnd()); boundarySet.add(adjusted[0]); boundarySet.add(adjusted[1]); } } } // Now process all zoneTypes. There will be at least two entries in the // boundary set (see above). Iterator<Integer> bi = boundarySet.iterator(); int begin = bi.next(); while (bi.hasNext()) { int end = bi.next(); process(jcas, text.substring(begin, end), begin); begin = end; } } }
@Override public boolean incrementToken() throws IOException { if (iterator == null) { initializeIterator(); } if (iterator.hasNext()) { clearAttributes(); AnnotationFS next = iterator.next(); termAttr.append(next.getCoveredText()); offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd())); return true; } else { return false; } }
/** * Creates a Stem annotation with same begin and end as the AnnotationFS fs, the value is the * stemmed value derived by applying the featurepath. * * @param jcas the JCas * @param fs the AnnotationFS where the Stem annotation is created * @throws AnalysisEngineProcessException if the {@code stem} method from the snowball stemmer * cannot be invoked. */ private void createStemAnnotation(JCas jcas, AnnotationFS fs) throws AnalysisEngineProcessException { // Check for blank text, it makes no sense to add a stem then (and raised an exception) String value = fp.getValue(fs); if (!StringUtils.isBlank(value)) { if (lowerCase) { // Fixme - should use locale/language defined in CAS. value = value.toLowerCase(Locale.US); } Stem stemAnnot = new Stem(jcas, fs.getBegin(), fs.getEnd()); SnowballProgram programm = getSnowballProgram(jcas); programm.setCurrent(value); try { // The patched snowball from Lucene has this as a method on SnowballProgram // but if we have some other snowball also in the classpath, Java might // choose to use the other. So to be safe, we use a reflection here. // -- REC, 2011-04-17 MethodUtils.invokeMethod(programm, "stem", null); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } stemAnnot.setValue(programm.getCurrent()); stemAnnot.addToIndexes(jcas); // Try setting the "stem" feature on Tokens. Feature feat = fs.getType().getFeatureByBaseName("stem"); if (feat != null && feat.getRange() != null && jcas.getTypeSystem().subsumes(feat.getRange(), stemAnnot.getType())) { fs.setFeatureValue(feat, stemAnnot); } } }
private void updateCrossSentAnnoList( Map<Integer, Integer> segmentBeginEnd, Map<String, JCas> jCases, List<Type> entryTypes) { crossSentenceLists = new HashMap<>(); for (Integer begin : segmentBeginEnd.keySet()) { int thisSent = -1; Set<Integer> crossSents = new HashSet<>(); for (Type t : entryTypes) { for (JCas c : jCases.values()) { if (thisSent == -1) { thisSent = BratAjaxCasUtil.getSentenceNumber(c, begin); } // update cross-sentence annotation lists for (AnnotationFS fs : selectCovered(c.getCas(), t, this.begin, end)) { // CASE 1. annotation begins here if (fs.getBegin() >= begin && fs.getBegin() <= segmentBeginEnd.get(begin)) { if (fs.getEnd() > segmentBeginEnd.get(begin) || fs.getEnd() < begin) { Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd()); int thatSent = BratAjaxCasUtil.getSentenceNumber(c, s.getBegin()); crossSents.add(thatSent); } } // CASE 2. Annotation ends here else if (fs.getEnd() >= begin && fs.getEnd() <= segmentBeginEnd.get(begin)) { if (fs.getBegin() > segmentBeginEnd.get(begin) || fs.getBegin() < begin) { int thatSent = BratAjaxCasUtil.getSentenceNumber(c, fs.getBegin()); crossSents.add(thatSent); } } } for (AnnotationFS fs : selectCovered(c.getCas(), t, begin, end)) { if (fs.getBegin() <= segmentBeginEnd.get(begin) && fs.getEnd() > segmentBeginEnd.get(begin)) { Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd()); segmentBeginEnd.put(begin, s.getEnd()); } } } } crossSentenceLists.put(thisSent, crossSents); } }
public boolean isIdenticalWith(AnnotationFS anno) { return anno.getBegin() == begin && anno.getEnd() == end; }
public Offsets(AnnotationFS anno) { this(anno.getBegin(), anno.getEnd()); }
/** * Add annotations from the CAS, which is controlled by the window size, to the brat response * {@link GetDocumentResponse} * * @param aJcas The JCAS object containing annotations * @param aResponse A brat response containing annotations in brat protocol * @param aBratAnnotatorModel Data model for brat annotations * @param aColoringStrategy the coloring strategy to render this layer (ignored) */ @Override public void render( JCas aJcas, List<AnnotationFeature> aFeatures, GetDocumentResponse aResponse, BratAnnotatorModel aBratAnnotatorModel, ColoringStrategy aColoringStrategy) { // Get begin and end offsets of window content int windowBegin = BratAjaxCasUtil.selectByAddr( aJcas, Sentence.class, aBratAnnotatorModel.getSentenceAddress()) .getBegin(); int windowEnd = BratAjaxCasUtil.selectByAddr( aJcas, Sentence.class, BratAjaxCasUtil.getLastSentenceAddressInDisplayWindow( aJcas, aBratAnnotatorModel.getSentenceAddress(), aBratAnnotatorModel.getPreferences().getWindowSize())) .getEnd(); // Find the features for the arc and span labels - it is possible that we do not find a // feature for arc/span labels because they may have been disabled. AnnotationFeature spanLabelFeature = null; AnnotationFeature arcLabelFeature = null; for (AnnotationFeature f : aFeatures) { if (WebAnnoConst.COREFERENCE_TYPE_FEATURE.equals(f.getName())) { spanLabelFeature = f; } if (WebAnnoConst.COREFERENCE_RELATION_FEATURE.equals(f.getName())) { arcLabelFeature = f; } } // At this point arc and span feature labels must have been found! If not, the later code // will crash. Type chainType = getAnnotationType(aJcas.getCas()); Feature chainFirst = chainType.getFeatureByBaseName(chainFirstFeatureName); int colorIndex = 0; // Iterate over the chains for (FeatureStructure chainFs : selectFS(aJcas.getCas(), chainType)) { AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst); AnnotationFS prevLinkFs = null; // Every chain is supposed to have a different color String color = ColoringStrategy.PALETTE_NORMAL_FILTERED[ colorIndex % ColoringStrategy.PALETTE_NORMAL_FILTERED.length]; // The color index is updated even for chains that have no visible links in the current // window because we would like the chain color to be independent of visibility. In // particular the color of a chain should not change when switching pages/scrolling. colorIndex++; // Iterate over the links of the chain while (linkFs != null) { Feature linkNext = linkFs.getType().getFeatureByBaseName(linkNextFeatureName); AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext); // Is link after window? If yes, we can skip the rest of the chain if (linkFs.getBegin() >= windowEnd) { break; // Go to next chain } // Is link before window? We only need links that being within the window and that // end within the window if (!(linkFs.getBegin() >= windowBegin) && (linkFs.getEnd() <= windowEnd)) { // prevLinkFs remains null until we enter the window linkFs = nextLinkFs; continue; // Go to next link } String bratTypeName = TypeUtil.getBratTypeName(this); // Render span { String bratLabelText = TypeUtil.getBratLabelText( this, linkFs, (spanLabelFeature != null) ? asList(spanLabelFeature) : Collections.EMPTY_LIST); Offsets offsets = new Offsets(linkFs.getBegin() - windowBegin, linkFs.getEnd() - windowBegin); aResponse.addEntity( new Entity( BratAjaxCasUtil.getAddr(linkFs), bratTypeName, offsets, bratLabelText, color)); } // Render arc (we do this on prevLinkFs because then we easily know that the current // and last link are within the window ;) if (prevLinkFs != null) { String bratLabelText = null; if (linkedListBehavior && arcLabelFeature != null) { // Render arc label bratLabelText = TypeUtil.getBratLabelText(this, prevLinkFs, asList(arcLabelFeature)); } else { // Render only chain type bratLabelText = TypeUtil.getBratLabelText(this, prevLinkFs, Collections.EMPTY_LIST); } List<Argument> argumentList = asList( new Argument("Arg1", BratAjaxCasUtil.getAddr(prevLinkFs)), new Argument("Arg2", BratAjaxCasUtil.getAddr(linkFs))); aResponse.addRelation( new Relation( BratAjaxCasUtil.getAddr(prevLinkFs), bratTypeName, argumentList, bratLabelText, color)); } // if (BratAjaxCasUtil.isSame(linkFs, nextLinkFs)) { // log.error("Loop in CAS detected, aborting rendering of chains"); // break; // } prevLinkFs = linkFs; linkFs = nextLinkFs; } } }
/** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */ private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false); // get document text String docText = cas.getDocumentText(); char[] docCharArray = docText.toCharArray(); // get iterator over annotations sorted by increasing start position and // decreasing end position FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator(); // This is basically a recursive algorithm that has had the recursion // removed through the use of an explicit Stack. We iterate over the // annotations, and if an annotation contains other annotations, we // push the parent annotation on the stack, process the children, and // then come back to the parent later. List<AnnotationFS> stack = new ArrayList<AnnotationFS>(); int pos = 0; ContentHandler handler = sax2xml.getContentHandler(); handler.startDocument(); // write the start tag converter.startRootElement(handler); // now use null is a placeholder for this artificial Document annotation AnnotationFS curAnnot = null; while (iterator.isValid()) { AnnotationFS nextAnnot = iterator.get(); if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) { // nextAnnot's start point is within the span of curAnnot if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check { // nextAnnot is contained within curAnnot // write text between current pos and beginning of nextAnnot try { handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos); pos = nextAnnot.getBegin(); converter.startAnnotationElement(nextAnnot, handler); // push parent annotation on stack stack.add(curAnnot); // move on to next annotation curAnnot = nextAnnot; } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + nextAnnot.getBegin() + "," + nextAnnot.getEnd() + " in document of length " + docText.length()); } } iterator.moveToNext(); } else { // nextAnnot begins after curAnnot ends // write text between current pos and end of curAnnot try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + " in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); // pop next containing annotation off stack curAnnot = stack.remove(stack.size() - 1); } } // finished writing all start tags, now finish up if (curAnnot != null) { try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + "in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); while (!stack.isEmpty()) { curAnnot = stack.remove(stack.size() - 1); // pop if (curAnnot == null) { break; } try { handler.characters(docCharArray, pos, curAnnot.getEnd() - pos); pos = curAnnot.getEnd(); } catch (StringIndexOutOfBoundsException e) { System.err.println( "Invalid annotation range: " + curAnnot.getBegin() + "," + curAnnot.getEnd() + "in document of length " + docText.length()); } converter.endAnnotationElement(curAnnot, handler); } } if (pos < docCharArray.length) { handler.characters(docCharArray, pos, docCharArray.length - pos); } converter.endRootElement(handler); handler.endDocument(); // return XML string return new String(byteArrayOutputStream.toByteArray()); }