Ejemplo n.º 1
0
 @Override
 public int compare(AnnotationFS arg0, AnnotationFS arg1) {
   int beginDiff = arg0.getBegin() - arg1.getBegin();
   if (beginDiff == 0) {
     return arg1.getEnd() - arg0.getEnd();
   } else {
     return beginDiff;
   }
 }
Ejemplo n.º 2
0
  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    // sentenceCount = 0;
    tokenCount = 0;

    String text = jcas.getDocumentText();

    String[] zones = getZoneTypes();
    if (isStrictZoning()) {
      if (zones == null || zones.length == 0) {
        process(jcas, text.substring(0, text.length()), 0);
      } else if (zones.length != 1) {
        throw new AnalysisEngineProcessException(
            new IllegalStateException("Strict zoning cannot use multiple zone types"));
      } else {
        CAS cas = jcas.getCas();
        for (AnnotationFS zone : select(cas, getType(cas, zones[0]))) {
          int[] adjusted = limit(text, zone.getBegin(), zone.getEnd());
          process(jcas, text.substring(adjusted[0], adjusted[1]), adjusted[0]);
        }
      }
    } else {
      // This set collects all zone boundaries.
      SortedSet<Integer> boundarySet = new TreeSet<Integer>();
      boundarySet.add(0); // Add start boundary
      boundarySet.add(text.length()); // Add end boundary

      // If zoneTypes have been define then get the boundaries, otherwise we will
      // simply have one big zone covering the whole document.
      if (zones != null) {
        // Iterate over all the zone indices and create sentences respecting
        // the zone boundaries. If the zoneTypes overlap... well... bad luck!
        for (String zoneName : zones) {
          CAS cas = jcas.getCas();
          for (AnnotationFS zone : select(cas, getType(cas, zoneName))) {
            int[] adjusted = limit(text, zone.getBegin(), zone.getEnd());
            boundarySet.add(adjusted[0]);
            boundarySet.add(adjusted[1]);
          }
        }
      }

      // Now process all zoneTypes. There will be at least two entries in the
      // boundary set (see above).
      Iterator<Integer> bi = boundarySet.iterator();
      int begin = bi.next();
      while (bi.hasNext()) {
        int end = bi.next();
        process(jcas, text.substring(begin, end), begin);
        begin = end;
      }
    }
  }
 @Override
 public boolean incrementToken() throws IOException {
   if (iterator == null) {
     initializeIterator();
   }
   if (iterator.hasNext()) {
     clearAttributes();
     AnnotationFS next = iterator.next();
     termAttr.append(next.getCoveredText());
     offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
     return true;
   } else {
     return false;
   }
 }
Ejemplo n.º 4
0
  private void updateCrossSentAnnoList(
      Map<Integer, Integer> segmentBeginEnd, Map<String, JCas> jCases, List<Type> entryTypes) {
    crossSentenceLists = new HashMap<>();
    for (Integer begin : segmentBeginEnd.keySet()) {
      int thisSent = -1;
      Set<Integer> crossSents = new HashSet<>();
      for (Type t : entryTypes) {
        for (JCas c : jCases.values()) {
          if (thisSent == -1) {
            thisSent = BratAjaxCasUtil.getSentenceNumber(c, begin);
          }
          // update cross-sentence annotation lists
          for (AnnotationFS fs : selectCovered(c.getCas(), t, this.begin, end)) {
            // CASE 1. annotation begins here
            if (fs.getBegin() >= begin && fs.getBegin() <= segmentBeginEnd.get(begin)) {
              if (fs.getEnd() > segmentBeginEnd.get(begin) || fs.getEnd() < begin) {
                Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd());
                int thatSent = BratAjaxCasUtil.getSentenceNumber(c, s.getBegin());
                crossSents.add(thatSent);
              }
            }
            // CASE 2. Annotation ends here
            else if (fs.getEnd() >= begin && fs.getEnd() <= segmentBeginEnd.get(begin)) {
              if (fs.getBegin() > segmentBeginEnd.get(begin) || fs.getBegin() < begin) {
                int thatSent = BratAjaxCasUtil.getSentenceNumber(c, fs.getBegin());
                crossSents.add(thatSent);
              }
            }
          }

          for (AnnotationFS fs : selectCovered(c.getCas(), t, begin, end)) {
            if (fs.getBegin() <= segmentBeginEnd.get(begin)
                && fs.getEnd() > segmentBeginEnd.get(begin)) {
              Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd());
              segmentBeginEnd.put(begin, s.getEnd());
            }
          }
        }
      }
      crossSentenceLists.put(thisSent, crossSents);
    }
  }
Ejemplo n.º 5
0
  /**
   * Creates a Stem annotation with same begin and end as the AnnotationFS fs, the value is the
   * stemmed value derived by applying the featurepath.
   *
   * @param jcas the JCas
   * @param fs the AnnotationFS where the Stem annotation is created
   * @throws AnalysisEngineProcessException if the {@code stem} method from the snowball stemmer
   *     cannot be invoked.
   */
  private void createStemAnnotation(JCas jcas, AnnotationFS fs)
      throws AnalysisEngineProcessException {
    // Check for blank text, it makes no sense to add a stem then (and raised an exception)
    String value = fp.getValue(fs);
    if (!StringUtils.isBlank(value)) {
      if (lowerCase) {
        // Fixme - should use locale/language defined in CAS.
        value = value.toLowerCase(Locale.US);
      }

      Stem stemAnnot = new Stem(jcas, fs.getBegin(), fs.getEnd());
      SnowballProgram programm = getSnowballProgram(jcas);
      programm.setCurrent(value);

      try {
        // The patched snowball from Lucene has this as a method on SnowballProgram
        // but if we have some other snowball also in the classpath, Java might
        // choose to use the other. So to be safe, we use a reflection here.
        // -- REC, 2011-04-17
        MethodUtils.invokeMethod(programm, "stem", null);
      } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
      }

      stemAnnot.setValue(programm.getCurrent());
      stemAnnot.addToIndexes(jcas);

      // Try setting the "stem" feature on Tokens.
      Feature feat = fs.getType().getFeatureByBaseName("stem");
      if (feat != null
          && feat.getRange() != null
          && jcas.getTypeSystem().subsumes(feat.getRange(), stemAnnot.getType())) {
        fs.setFeatureValue(feat, stemAnnot);
      }
    }
  }
Ejemplo n.º 6
0
 public boolean isIdenticalWith(AnnotationFS anno) {
   return anno.getBegin() == begin && anno.getEnd() == end;
 }
Ejemplo n.º 7
0
 public Offsets(AnnotationFS anno) {
   this(anno.getBegin(), anno.getEnd());
 }
Ejemplo n.º 8
0
  /**
   * Add annotations from the CAS, which is controlled by the window size, to the brat response
   * {@link GetDocumentResponse}
   *
   * @param aJcas The JCAS object containing annotations
   * @param aResponse A brat response containing annotations in brat protocol
   * @param aBratAnnotatorModel Data model for brat annotations
   * @param aColoringStrategy the coloring strategy to render this layer (ignored)
   */
  @Override
  public void render(
      JCas aJcas,
      List<AnnotationFeature> aFeatures,
      GetDocumentResponse aResponse,
      BratAnnotatorModel aBratAnnotatorModel,
      ColoringStrategy aColoringStrategy) {
    // Get begin and end offsets of window content
    int windowBegin =
        BratAjaxCasUtil.selectByAddr(
                aJcas, Sentence.class, aBratAnnotatorModel.getSentenceAddress())
            .getBegin();
    int windowEnd =
        BratAjaxCasUtil.selectByAddr(
                aJcas,
                Sentence.class,
                BratAjaxCasUtil.getLastSentenceAddressInDisplayWindow(
                    aJcas,
                    aBratAnnotatorModel.getSentenceAddress(),
                    aBratAnnotatorModel.getPreferences().getWindowSize()))
            .getEnd();

    // Find the features for the arc and span labels - it is possible that we do not find a
    // feature for arc/span labels because they may have been disabled.
    AnnotationFeature spanLabelFeature = null;
    AnnotationFeature arcLabelFeature = null;
    for (AnnotationFeature f : aFeatures) {
      if (WebAnnoConst.COREFERENCE_TYPE_FEATURE.equals(f.getName())) {
        spanLabelFeature = f;
      }
      if (WebAnnoConst.COREFERENCE_RELATION_FEATURE.equals(f.getName())) {
        arcLabelFeature = f;
      }
    }
    // At this point arc and span feature labels must have been found! If not, the later code
    // will crash.

    Type chainType = getAnnotationType(aJcas.getCas());
    Feature chainFirst = chainType.getFeatureByBaseName(chainFirstFeatureName);

    int colorIndex = 0;
    // Iterate over the chains
    for (FeatureStructure chainFs : selectFS(aJcas.getCas(), chainType)) {
      AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst);
      AnnotationFS prevLinkFs = null;

      // Every chain is supposed to have a different color
      String color =
          ColoringStrategy.PALETTE_NORMAL_FILTERED[
              colorIndex % ColoringStrategy.PALETTE_NORMAL_FILTERED.length];
      // The color index is updated even for chains that have no visible links in the current
      // window because we would like the chain color to be independent of visibility. In
      // particular the color of a chain should not change when switching pages/scrolling.
      colorIndex++;

      // Iterate over the links of the chain
      while (linkFs != null) {
        Feature linkNext = linkFs.getType().getFeatureByBaseName(linkNextFeatureName);
        AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext);

        // Is link after window? If yes, we can skip the rest of the chain
        if (linkFs.getBegin() >= windowEnd) {
          break; // Go to next chain
        }

        // Is link before window? We only need links that being within the window and that
        // end within the window
        if (!(linkFs.getBegin() >= windowBegin) && (linkFs.getEnd() <= windowEnd)) {
          // prevLinkFs remains null until we enter the window
          linkFs = nextLinkFs;
          continue; // Go to next link
        }

        String bratTypeName = TypeUtil.getBratTypeName(this);

        // Render span
        {
          String bratLabelText =
              TypeUtil.getBratLabelText(
                  this,
                  linkFs,
                  (spanLabelFeature != null) ? asList(spanLabelFeature) : Collections.EMPTY_LIST);
          Offsets offsets =
              new Offsets(linkFs.getBegin() - windowBegin, linkFs.getEnd() - windowBegin);

          aResponse.addEntity(
              new Entity(
                  BratAjaxCasUtil.getAddr(linkFs), bratTypeName, offsets, bratLabelText, color));
        }

        // Render arc (we do this on prevLinkFs because then we easily know that the current
        // and last link are within the window ;)
        if (prevLinkFs != null) {
          String bratLabelText = null;

          if (linkedListBehavior && arcLabelFeature != null) {
            // Render arc label
            bratLabelText = TypeUtil.getBratLabelText(this, prevLinkFs, asList(arcLabelFeature));
          } else {
            // Render only chain type
            bratLabelText = TypeUtil.getBratLabelText(this, prevLinkFs, Collections.EMPTY_LIST);
          }

          List<Argument> argumentList =
              asList(
                  new Argument("Arg1", BratAjaxCasUtil.getAddr(prevLinkFs)),
                  new Argument("Arg2", BratAjaxCasUtil.getAddr(linkFs)));

          aResponse.addRelation(
              new Relation(
                  BratAjaxCasUtil.getAddr(prevLinkFs),
                  bratTypeName,
                  argumentList,
                  bratLabelText,
                  color));
        }

        //                if (BratAjaxCasUtil.isSame(linkFs, nextLinkFs)) {
        //                    log.error("Loop in CAS detected, aborting rendering of chains");
        //                    break;
        //                }

        prevLinkFs = linkFs;
        linkFs = nextLinkFs;
      }
    }
  }
Ejemplo n.º 9
0
  /** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */
  private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false);

    // get document text
    String docText = cas.getDocumentText();
    char[] docCharArray = docText.toCharArray();

    // get iterator over annotations sorted by increasing start position and
    // decreasing end position
    FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator();

    // This is basically a recursive algorithm that has had the recursion
    // removed through the use of an explicit Stack. We iterate over the
    // annotations, and if an annotation contains other annotations, we
    // push the parent annotation on the stack, process the children, and
    // then come back to the parent later.
    List<AnnotationFS> stack = new ArrayList<AnnotationFS>();
    int pos = 0;

    ContentHandler handler = sax2xml.getContentHandler();
    handler.startDocument();
    // write the start tag
    converter.startRootElement(handler);
    // now use null is a placeholder for this artificial Document annotation
    AnnotationFS curAnnot = null;

    while (iterator.isValid()) {
      AnnotationFS nextAnnot = iterator.get();

      if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) {
        // nextAnnot's start point is within the span of curAnnot
        if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check
        {
          // nextAnnot is contained within curAnnot

          // write text between current pos and beginning of nextAnnot
          try {
            handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos);
            pos = nextAnnot.getBegin();
            converter.startAnnotationElement(nextAnnot, handler);

            // push parent annotation on stack
            stack.add(curAnnot);
            // move on to next annotation
            curAnnot = nextAnnot;
          } catch (StringIndexOutOfBoundsException e) {
            System.err.println(
                "Invalid annotation range: "
                    + nextAnnot.getBegin()
                    + ","
                    + nextAnnot.getEnd()
                    + " in document of length "
                    + docText.length());
          }
        }
        iterator.moveToNext();
      } else {
        // nextAnnot begins after curAnnot ends
        // write text between current pos and end of curAnnot
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + " in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);

        // pop next containing annotation off stack
        curAnnot = stack.remove(stack.size() - 1);
      }
    }

    // finished writing all start tags, now finish up
    if (curAnnot != null) {
      try {
        handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
        pos = curAnnot.getEnd();
      } catch (StringIndexOutOfBoundsException e) {
        System.err.println(
            "Invalid annotation range: "
                + curAnnot.getBegin()
                + ","
                + curAnnot.getEnd()
                + "in document of length "
                + docText.length());
      }
      converter.endAnnotationElement(curAnnot, handler);

      while (!stack.isEmpty()) {
        curAnnot = stack.remove(stack.size() - 1); // pop
        if (curAnnot == null) {
          break;
        }
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + "in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);
      }
    }

    if (pos < docCharArray.length) {
      handler.characters(docCharArray, pos, docCharArray.length - pos);
    }
    converter.endRootElement(handler);
    handler.endDocument();

    // return XML string
    return new String(byteArrayOutputStream.toByteArray());
  }