Java AnnotationFS 예제들, org.apache.uima.cas.text.AnnotationFS Java 예제들

예제 #1

0

파일 보기

파일: ChainAdapter.java 프로젝트: sholschneider/webanno

 @Override
 public int compare(AnnotationFS arg0, AnnotationFS arg1) {
   int beginDiff = arg0.getBegin() - arg1.getBegin();
   if (beginDiff == 0) {
     return arg1.getEnd() - arg0.getEnd();
   } else {
     return beginDiff;
   }
 }

예제 #2

0

파일 보기

파일: AbstractNameFinder.java 프로젝트: JJColeman/jcoleman_Capstone

  /** Performs name finding on the given cas object. */
  public final void process(CAS cas) {

    if (isRemoveExistingAnnotations) {
      final AnnotationComboIterator sentenceNameCombo =
          new AnnotationComboIterator(cas, mSentenceType, mNameType);

      List<AnnotationFS> removeAnnotations = new LinkedList<AnnotationFS>();
      for (AnnotationIteratorPair annotationIteratorPair : sentenceNameCombo) {
        for (AnnotationFS nameAnnotation : annotationIteratorPair.getSubIterator()) {
          removeAnnotations.add(nameAnnotation);
        }
      }

      for (AnnotationFS annotation : removeAnnotations) {
        cas.removeFsFromIndexes(annotation);
      }
    }

    final AnnotationComboIterator sentenceTokenCombo =
        new AnnotationComboIterator(cas, mSentenceType, mTokenType);

    for (AnnotationIteratorPair annotationIteratorPair : sentenceTokenCombo) {

      final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();

      final List<String> sentenceTokenList = new LinkedList<String>();

      for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {

        sentenceTokenAnnotationList.add(tokenAnnotation);

        sentenceTokenList.add(tokenAnnotation.getCoveredText());
      }

      Span[] names =
          find(cas, (String[]) sentenceTokenList.toArray(new String[sentenceTokenList.size()]));

      AnnotationFS nameAnnotations[] = new AnnotationFS[names.length];

      for (int i = 0; i < names.length; i++) {

        int startIndex =
            ((AnnotationFS) sentenceTokenAnnotationList.get(names[i].getStart())).getBegin();

        int endIndex =
            ((AnnotationFS) sentenceTokenAnnotationList.get(names[i].getEnd() - 1)).getEnd();

        nameAnnotations[i] = cas.createAnnotation(mNameType, startIndex, endIndex);

        cas.getIndexRepository().addFS(nameAnnotations[i]);
      }

      postProcessAnnotations(names, nameAnnotations);
    }

    documentDone(cas);
  }

예제 #3

0

파일 보기

파일: PosFilter.java 프로젝트: renaud/dkpro-core

  /**
   * Returns the (one) annotation of a given type that is aligned with another annotation.
   *
   * @param annotation An annotation.
   * @return The annotation aligned with another annotation.
   */
  private AnnotationFS getAnnotation(Type type, AnnotationFS annotation) {
    List<AnnotationFS> annotations = CasUtil.selectCovered(annotation.getCAS(), type, annotation);
    if (annotations.size() != 1) {
      getLogger()
          .warn(
              "Could not find matching annotation of type "
                  + type
                  + " for annotation: "
                  + annotation.getCoveredText());
      return null;
    }

    return annotations.get(0);
  }

예제 #4

0

파일 보기

파일: UIMAAnnotationsTokenizer.java 프로젝트: RainingWang/lucene-solr

 @Override
 public boolean incrementToken() throws IOException {
   if (iterator == null) {
     initializeIterator();
   }
   if (iterator.hasNext()) {
     clearAttributes();
     AnnotationFS next = iterator.next();
     termAttr.append(next.getCoveredText());
     offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
     return true;
   } else {
     return false;
   }
 }

예제 #5

0

파일 보기

파일: MalletTopicModelEstimator.java 프로젝트: mjunsilo/dkpro-core

  private static Collection<String> getTokensFromAnnotation(
      AnnotationFS annotation, boolean useLemma, int minTokenLength) {
    Collection<String> tokens;
    if (useLemma) {
      tokens = new ArrayList<>();

      /* concatenate multiple lemmas: */
      // selectCovered(Lemma.class, annotation).stream()
      // .map(lemma -> lemma.getValue())
      // .filter(lemma -> lemma.length() >= minTokenLength)
      // .reduce((l1, l2) -> l1 + "_" + l2)
      // .ifPresent(token -> tokens.add(token));

      for (Lemma lemma : selectCovered(Lemma.class, annotation)) {
        String text = lemma.getValue();
        if (text.length() >= minTokenLength) {
          tokens.add(text);
        }
      }
    } else {
      tokens = new ArrayList<>(1);
      String text = annotation.getCoveredText();
      if (text.length() >= minTokenLength) {
        tokens.add(text);
      }
    }
    return tokens;
  }

예제 #6

0

파일 보기

파일: SnowballStemmer.java 프로젝트: renaud/dkpro-core

  /**
   * Creates a Stem annotation with same begin and end as the AnnotationFS fs, the value is the
   * stemmed value derived by applying the featurepath.
   *
   * @param jcas the JCas
   * @param fs the AnnotationFS where the Stem annotation is created
   * @throws AnalysisEngineProcessException if the {@code stem} method from the snowball stemmer
   *     cannot be invoked.
   */
  private void createStemAnnotation(JCas jcas, AnnotationFS fs)
      throws AnalysisEngineProcessException {
    // Check for blank text, it makes no sense to add a stem then (and raised an exception)
    String value = fp.getValue(fs);
    if (!StringUtils.isBlank(value)) {
      if (lowerCase) {
        // Fixme - should use locale/language defined in CAS.
        value = value.toLowerCase(Locale.US);
      }

      Stem stemAnnot = new Stem(jcas, fs.getBegin(), fs.getEnd());
      SnowballProgram programm = getSnowballProgram(jcas);
      programm.setCurrent(value);

      try {
        // The patched snowball from Lucene has this as a method on SnowballProgram
        // but if we have some other snowball also in the classpath, Java might
        // choose to use the other. So to be safe, we use a reflection here.
        // -- REC, 2011-04-17
        MethodUtils.invokeMethod(programm, "stem", null);
      } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
      }

      stemAnnot.setValue(programm.getCurrent());
      stemAnnot.addToIndexes(jcas);

      // Try setting the "stem" feature on Tokens.
      Feature feat = fs.getType().getFeatureByBaseName("stem");
      if (feat != null
          && feat.getRange() != null
          && jcas.getTypeSystem().subsumes(feat.getRange(), stemAnnot.getType())) {
        fs.setFeatureValue(feat, stemAnnot);
      }
    }
  }

예제 #7

0

파일 보기

파일: SampleEntityAnnotator.java 프로젝트: RainingWang/lucene-solr

  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
    Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
    Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);

    for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
      String tokenPOS = ((TokenAnnotation) annotation).getPosTag();

      if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
        AnnotationFS entityAnnotation =
            jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());

        entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());

        String name =
            "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
        if (annotation.getCoveredText().equals("Apache")) name = "ORGANIZATION";
        entityAnnotation.setStringValue(nameFeature, name);

        jcas.addFsToIndexes(entityAnnotation);
      }
    }
  }

예제 #8

0

파일 보기

파일: SegmenterBase.java 프로젝트: mjunsilo/dkpro-core

  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    // sentenceCount = 0;
    tokenCount = 0;

    String text = jcas.getDocumentText();

    String[] zones = getZoneTypes();
    if (isStrictZoning()) {
      if (zones == null || zones.length == 0) {
        process(jcas, text.substring(0, text.length()), 0);
      } else if (zones.length != 1) {
        throw new AnalysisEngineProcessException(
            new IllegalStateException("Strict zoning cannot use multiple zone types"));
      } else {
        CAS cas = jcas.getCas();
        for (AnnotationFS zone : select(cas, getType(cas, zones[0]))) {
          int[] adjusted = limit(text, zone.getBegin(), zone.getEnd());
          process(jcas, text.substring(adjusted[0], adjusted[1]), adjusted[0]);
        }
      }
    } else {
      // This set collects all zone boundaries.
      SortedSet<Integer> boundarySet = new TreeSet<Integer>();
      boundarySet.add(0); // Add start boundary
      boundarySet.add(text.length()); // Add end boundary

      // If zoneTypes have been define then get the boundaries, otherwise we will
      // simply have one big zone covering the whole document.
      if (zones != null) {
        // Iterate over all the zone indices and create sentences respecting
        // the zone boundaries. If the zoneTypes overlap... well... bad luck!
        for (String zoneName : zones) {
          CAS cas = jcas.getCas();
          for (AnnotationFS zone : select(cas, getType(cas, zoneName))) {
            int[] adjusted = limit(text, zone.getBegin(), zone.getEnd());
            boundarySet.add(adjusted[0]);
            boundarySet.add(adjusted[1]);
          }
        }
      }

      // Now process all zoneTypes. There will be at least two entries in the
      // boundary set (see above).
      Iterator<Integer> bi = boundarySet.iterator();
      int begin = bi.next();
      while (bi.hasNext()) {
        int end = bi.next();
        process(jcas, text.substring(begin, end), begin);
        begin = end;
      }
    }
  }

예제 #9

0

파일 보기

파일: PosFilter.java 프로젝트: renaud/dkpro-core

  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    getContext().getLogger().log(Level.CONFIG, "Entering " + this.getClass().getSimpleName());

    Type tokenType = jcas.getCas().getTypeSystem().getType(Token.class.getCanonicalName());
    Type stemType = jcas.getCas().getTypeSystem().getType(Stem.class.getCanonicalName());
    Type lemmaType = jcas.getCas().getTypeSystem().getType(Lemma.class.getCanonicalName());
    Type posType = jcas.getCas().getTypeSystem().getType(POS.class.getCanonicalName());
    Type typeToRemoveType = jcas.getCas().getTypeSystem().getType(typeToRemove);

    if (typeToRemoveType == null) {
      throw new AnalysisEngineProcessException(
          new Throwable("Could not get type for feature path: " + typeToRemove));
    }

    List<AnnotationFS> toRemove = new ArrayList<AnnotationFS>();
    try {
      for (Entry<AnnotationFS, String> entry :
          FeaturePathFactory.select(jcas.getCas(), typeToRemove)) {
        AnnotationFS annotation = entry.getKey();
        AnnotationFS pos;
        if (typeToRemoveType.equals(posType)) {
          pos = annotation;
        } else {
          pos = getAnnotation(posType, annotation);
          if (pos == null) {
            continue;
          }
        }

        String posString = pos.getType().getShortName();
        if (posString.equals("ADJ") && !adj) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("ADV") && !adv) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("ART") && !art) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("CARD") && !card) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("CONJ") && !conj) {
          toRemove.add(annotation);
          continue;
        }
        if ((posString.equals("N") || posString.equals("NN") || posString.equals("NP")) && !n) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("O") && !o) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("PP") && !pp) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("PR") && !pr) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("PUNC") && !punc) {
          toRemove.add(annotation);
          continue;
        }
        if (posString.equals("V") && !v) {
          toRemove.add(annotation);
          continue;
        }
      }
    } catch (FeaturePathException e) {
      throw new AnalysisEngineProcessException(e);
    }

    for (AnnotationFS fs : toRemove) {
      // If we want to remove tokens, we also remove accompanying lemma, stem, POS tag.
      if (fs.getType().equals(tokenType)) {
        AnnotationFS stemFS = getAnnotation(stemType, fs);
        if (stemFS != null) {
          jcas.getCas().removeFsFromIndexes(stemFS);
        }
        AnnotationFS lemmaFS = getAnnotation(lemmaType, fs);
        if (lemmaFS != null) {
          jcas.getCas().removeFsFromIndexes(lemmaFS);
        }
        AnnotationFS posFS = getAnnotation(posType, fs);
        if (posFS != null) {
          jcas.getCas().removeFsFromIndexes(posFS);
        }
      }
      // We don't want to keep the feature in the token, remove it here.
      else {
        if (fs.getType().equals(stemType) || fs.getType().equals(lemmaType)) {
          Token token = (Token) getAnnotation(tokenType, fs);
          if (token != null) {
            String fbn = fs.getType().getShortName().toLowerCase();
            Feature f = tokenType.getFeatureByBaseName(fbn);
            token.setFeatureValue(f, null);
          }
        } else if (fs instanceof POS) {
          Token token = (Token) getAnnotation(tokenType, fs);
          if (token != null) {
            token.setPos(null);
          }
        }
      }

      jcas.getCas().removeFsFromIndexes(fs);
    }
  }

예제 #10

0

파일 보기

파일: PoStagger.java 프로젝트: strategist922/Canova

  /** Performs pos-tagging on the given tcas object. */
  @Override
  public synchronized void process(CAS tcas) {

    final AnnotationComboIterator comboIterator =
        new AnnotationComboIterator(tcas, this.sentenceType, this.tokenType);

    for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {

      final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();

      final List<String> sentenceTokenList = new LinkedList<String>();

      for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {

        sentenceTokenAnnotationList.add(tokenAnnotation);

        sentenceTokenList.add(tokenAnnotation.getCoveredText());
      }

      final List<String> posTags = this.posTagger.tag(sentenceTokenList);

      double posProbabilities[] = null;

      if (this.probabilityFeature != null) {
        posProbabilities = this.posTagger.probs();
      }

      final Iterator<String> posTagIterator = posTags.iterator();
      final Iterator<AnnotationFS> sentenceTokenIterator = sentenceTokenAnnotationList.iterator();

      int index = 0;
      while (posTagIterator.hasNext() && sentenceTokenIterator.hasNext()) {
        final String posTag = posTagIterator.next();
        final AnnotationFS tokenAnnotation = sentenceTokenIterator.next();

        tokenAnnotation.setStringValue(this.posFeature, posTag);

        if (posProbabilities != null) {
          tokenAnnotation.setDoubleValue(this.posFeature, posProbabilities[index]);
        }

        index++;
      }

      // log tokens with pos
      if (this.logger.isLoggable(Level.FINER)) {

        final StringBuilder sentenceWithPos = new StringBuilder();

        sentenceWithPos.append("\"");

        for (final Iterator<AnnotationFS> it = sentenceTokenAnnotationList.iterator();
            it.hasNext(); ) {
          final AnnotationFS token = it.next();
          sentenceWithPos.append(token.getCoveredText());
          sentenceWithPos.append('\\');
          sentenceWithPos.append(token.getStringValue(this.posFeature));
          sentenceWithPos.append(' ');
        }

        // delete last whitespace
        if (sentenceWithPos.length() > 1) // not 0 because it contains already the " char
        sentenceWithPos.setLength(sentenceWithPos.length() - 1);

        sentenceWithPos.append("\"");

        this.logger.log(Level.FINER, sentenceWithPos.toString());
      }
    }
  }

예제 #11

0

파일 보기

파일: Mapper.java 프로젝트: sanju2010/ttc-project

 private void create(JCas cas, Feature feature, int begin, int end, String value) {
   Type type = feature.getDomain();
   AnnotationFS annotation = cas.getCas().createAnnotation(type, begin, end);
   annotation.setStringValue(feature, value);
   cas.addFsToIndexes(annotation);
 }

예제 #12

0

파일 보기

파일: Offsets.java 프로젝트: CLLKazan/UIMA-Ext

 public boolean isIdenticalWith(AnnotationFS anno) {
   return anno.getBegin() == begin && anno.getEnd() == end;
 }

예제 #13

0

파일 보기

파일: Offsets.java 프로젝트: CLLKazan/UIMA-Ext

 public Offsets(AnnotationFS anno) {
   this(anno.getBegin(), anno.getEnd());
 }

예제 #14

0

파일 보기

파일: SuggestionBuilder.java 프로젝트: aakashysharma/webanno

  private void updateCrossSentAnnoList(
      Map<Integer, Integer> segmentBeginEnd, Map<String, JCas> jCases, List<Type> entryTypes) {
    crossSentenceLists = new HashMap<>();
    for (Integer begin : segmentBeginEnd.keySet()) {
      int thisSent = -1;
      Set<Integer> crossSents = new HashSet<>();
      for (Type t : entryTypes) {
        for (JCas c : jCases.values()) {
          if (thisSent == -1) {
            thisSent = BratAjaxCasUtil.getSentenceNumber(c, begin);
          }
          // update cross-sentence annotation lists
          for (AnnotationFS fs : selectCovered(c.getCas(), t, this.begin, end)) {
            // CASE 1. annotation begins here
            if (fs.getBegin() >= begin && fs.getBegin() <= segmentBeginEnd.get(begin)) {
              if (fs.getEnd() > segmentBeginEnd.get(begin) || fs.getEnd() < begin) {
                Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd());
                int thatSent = BratAjaxCasUtil.getSentenceNumber(c, s.getBegin());
                crossSents.add(thatSent);
              }
            }
            // CASE 2. Annotation ends here
            else if (fs.getEnd() >= begin && fs.getEnd() <= segmentBeginEnd.get(begin)) {
              if (fs.getBegin() > segmentBeginEnd.get(begin) || fs.getBegin() < begin) {
                int thatSent = BratAjaxCasUtil.getSentenceNumber(c, fs.getBegin());
                crossSents.add(thatSent);
              }
            }
          }

          for (AnnotationFS fs : selectCovered(c.getCas(), t, begin, end)) {
            if (fs.getBegin() <= segmentBeginEnd.get(begin)
                && fs.getEnd() > segmentBeginEnd.get(begin)) {
              Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd());
              segmentBeginEnd.put(begin, s.getEnd());
            }
          }
        }
      }
      crossSentenceLists.put(thisSent, crossSents);
    }
  }

예제 #15

0

파일 보기

파일: TempEval2007Writer.java 프로젝트: alainloisel/cleartk

  /** Copied and modified from {@link org.apache.uima.util.CasToInlineXml} */
  private static String toXML(CAS cas, AnnotationsToElements converter) throws SAXException {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    XMLSerializer sax2xml = new XMLSerializer(byteArrayOutputStream, false);

    // get document text
    String docText = cas.getDocumentText();
    char[] docCharArray = docText.toCharArray();

    // get iterator over annotations sorted by increasing start position and
    // decreasing end position
    FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex().iterator();

    // This is basically a recursive algorithm that has had the recursion
    // removed through the use of an explicit Stack. We iterate over the
    // annotations, and if an annotation contains other annotations, we
    // push the parent annotation on the stack, process the children, and
    // then come back to the parent later.
    List<AnnotationFS> stack = new ArrayList<AnnotationFS>();
    int pos = 0;

    ContentHandler handler = sax2xml.getContentHandler();
    handler.startDocument();
    // write the start tag
    converter.startRootElement(handler);
    // now use null is a placeholder for this artificial Document annotation
    AnnotationFS curAnnot = null;

    while (iterator.isValid()) {
      AnnotationFS nextAnnot = iterator.get();

      if (curAnnot == null || nextAnnot.getBegin() < curAnnot.getEnd()) {
        // nextAnnot's start point is within the span of curAnnot
        if (curAnnot == null || nextAnnot.getEnd() <= curAnnot.getEnd()) // crossover span check
        {
          // nextAnnot is contained within curAnnot

          // write text between current pos and beginning of nextAnnot
          try {
            handler.characters(docCharArray, pos, nextAnnot.getBegin() - pos);
            pos = nextAnnot.getBegin();
            converter.startAnnotationElement(nextAnnot, handler);

            // push parent annotation on stack
            stack.add(curAnnot);
            // move on to next annotation
            curAnnot = nextAnnot;
          } catch (StringIndexOutOfBoundsException e) {
            System.err.println(
                "Invalid annotation range: "
                    + nextAnnot.getBegin()
                    + ","
                    + nextAnnot.getEnd()
                    + " in document of length "
                    + docText.length());
          }
        }
        iterator.moveToNext();
      } else {
        // nextAnnot begins after curAnnot ends
        // write text between current pos and end of curAnnot
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + " in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);

        // pop next containing annotation off stack
        curAnnot = stack.remove(stack.size() - 1);
      }
    }

    // finished writing all start tags, now finish up
    if (curAnnot != null) {
      try {
        handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
        pos = curAnnot.getEnd();
      } catch (StringIndexOutOfBoundsException e) {
        System.err.println(
            "Invalid annotation range: "
                + curAnnot.getBegin()
                + ","
                + curAnnot.getEnd()
                + "in document of length "
                + docText.length());
      }
      converter.endAnnotationElement(curAnnot, handler);

      while (!stack.isEmpty()) {
        curAnnot = stack.remove(stack.size() - 1); // pop
        if (curAnnot == null) {
          break;
        }
        try {
          handler.characters(docCharArray, pos, curAnnot.getEnd() - pos);
          pos = curAnnot.getEnd();
        } catch (StringIndexOutOfBoundsException e) {
          System.err.println(
              "Invalid annotation range: "
                  + curAnnot.getBegin()
                  + ","
                  + curAnnot.getEnd()
                  + "in document of length "
                  + docText.length());
        }
        converter.endAnnotationElement(curAnnot, handler);
      }
    }

    if (pos < docCharArray.length) {
      handler.characters(docCharArray, pos, docCharArray.length - pos);
    }
    converter.endRootElement(handler);
    handler.endDocument();

    // return XML string
    return new String(byteArrayOutputStream.toByteArray());
  }

예제 #16

0

파일 보기

파일: HtmlConverterXmlTest.java 프로젝트: renaud/ruta-core

  @Test
  public void testExpandOffsets() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true);
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("Some contentMore content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    AnnotationFS next = null;
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some contentMore content.", next.getCoveredText());
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some content", next.getCoveredText());
    next = iterator.next();
    boolean b1 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    next = iterator.next();
    boolean b2 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    // for one of these two annotation (with same offsets) the feature must be set to true
    assertEquals(true, b1 || b2);

    cas.release();
  }

예제 #17

0

파일 보기

파일: ChainAdapter.java 프로젝트: sholschneider/webanno

 /** Get the link following the current link. */
 private AnnotationFS getNextLink(AnnotationFS aLink) {
   return (AnnotationFS)
       aLink.getFeatureValue(aLink.getType().getFeatureByBaseName(linkNextFeatureName));
 }

예제 #18

0

파일 보기

파일: ChainAdapter.java 프로젝트: sholschneider/webanno

 /** Set the link following the current link. */
 private void setNextLink(AnnotationFS aLink, AnnotationFS aNext) {
   aLink.setFeatureValue(aLink.getType().getFeatureByBaseName(linkNextFeatureName), aNext);
 }

예제 #19

0

파일 보기

파일: ChainAdapter.java 프로젝트: sholschneider/webanno

  /**
   * Add annotations from the CAS, which is controlled by the window size, to the brat response
   * {@link GetDocumentResponse}
   *
   * @param aJcas The JCAS object containing annotations
   * @param aResponse A brat response containing annotations in brat protocol
   * @param aBratAnnotatorModel Data model for brat annotations
   * @param aColoringStrategy the coloring strategy to render this layer (ignored)
   */
  @Override
  public void render(
      JCas aJcas,
      List<AnnotationFeature> aFeatures,
      GetDocumentResponse aResponse,
      BratAnnotatorModel aBratAnnotatorModel,
      ColoringStrategy aColoringStrategy) {
    // Get begin and end offsets of window content
    int windowBegin =
        BratAjaxCasUtil.selectByAddr(
                aJcas, Sentence.class, aBratAnnotatorModel.getSentenceAddress())
            .getBegin();
    int windowEnd =
        BratAjaxCasUtil.selectByAddr(
                aJcas,
                Sentence.class,
                BratAjaxCasUtil.getLastSentenceAddressInDisplayWindow(
                    aJcas,
                    aBratAnnotatorModel.getSentenceAddress(),
                    aBratAnnotatorModel.getPreferences().getWindowSize()))
            .getEnd();

    // Find the features for the arc and span labels - it is possible that we do not find a
    // feature for arc/span labels because they may have been disabled.
    AnnotationFeature spanLabelFeature = null;
    AnnotationFeature arcLabelFeature = null;
    for (AnnotationFeature f : aFeatures) {
      if (WebAnnoConst.COREFERENCE_TYPE_FEATURE.equals(f.getName())) {
        spanLabelFeature = f;
      }
      if (WebAnnoConst.COREFERENCE_RELATION_FEATURE.equals(f.getName())) {
        arcLabelFeature = f;
      }
    }
    // At this point arc and span feature labels must have been found! If not, the later code
    // will crash.

    Type chainType = getAnnotationType(aJcas.getCas());
    Feature chainFirst = chainType.getFeatureByBaseName(chainFirstFeatureName);

    int colorIndex = 0;
    // Iterate over the chains
    for (FeatureStructure chainFs : selectFS(aJcas.getCas(), chainType)) {
      AnnotationFS linkFs = (AnnotationFS) chainFs.getFeatureValue(chainFirst);
      AnnotationFS prevLinkFs = null;

      // Every chain is supposed to have a different color
      String color =
          ColoringStrategy.PALETTE_NORMAL_FILTERED[
              colorIndex % ColoringStrategy.PALETTE_NORMAL_FILTERED.length];
      // The color index is updated even for chains that have no visible links in the current
      // window because we would like the chain color to be independent of visibility. In
      // particular the color of a chain should not change when switching pages/scrolling.
      colorIndex++;

      // Iterate over the links of the chain
      while (linkFs != null) {
        Feature linkNext = linkFs.getType().getFeatureByBaseName(linkNextFeatureName);
        AnnotationFS nextLinkFs = (AnnotationFS) linkFs.getFeatureValue(linkNext);

        // Is link after window? If yes, we can skip the rest of the chain
        if (linkFs.getBegin() >= windowEnd) {
          break; // Go to next chain
        }

        // Is link before window? We only need links that being within the window and that
        // end within the window
        if (!(linkFs.getBegin() >= windowBegin) && (linkFs.getEnd() <= windowEnd)) {
          // prevLinkFs remains null until we enter the window
          linkFs = nextLinkFs;
          continue; // Go to next link
        }

        String bratTypeName = TypeUtil.getBratTypeName(this);

        // Render span
        {
          String bratLabelText =
              TypeUtil.getBratLabelText(
                  this,
                  linkFs,
                  (spanLabelFeature != null) ? asList(spanLabelFeature) : Collections.EMPTY_LIST);
          Offsets offsets =
              new Offsets(linkFs.getBegin() - windowBegin, linkFs.getEnd() - windowBegin);

          aResponse.addEntity(
              new Entity(
                  BratAjaxCasUtil.getAddr(linkFs), bratTypeName, offsets, bratLabelText, color));
        }

        // Render arc (we do this on prevLinkFs because then we easily know that the current
        // and last link are within the window ;)
        if (prevLinkFs != null) {
          String bratLabelText = null;

          if (linkedListBehavior && arcLabelFeature != null) {
            // Render arc label
            bratLabelText = TypeUtil.getBratLabelText(this, prevLinkFs, asList(arcLabelFeature));
          } else {
            // Render only chain type
            bratLabelText = TypeUtil.getBratLabelText(this, prevLinkFs, Collections.EMPTY_LIST);
          }

          List<Argument> argumentList =
              asList(
                  new Argument("Arg1", BratAjaxCasUtil.getAddr(prevLinkFs)),
                  new Argument("Arg2", BratAjaxCasUtil.getAddr(linkFs)));

          aResponse.addRelation(
              new Relation(
                  BratAjaxCasUtil.getAddr(prevLinkFs),
                  bratTypeName,
                  argumentList,
                  bratLabelText,
                  color));
        }

        //                if (BratAjaxCasUtil.isSame(linkFs, nextLinkFs)) {
        //                    log.error("Loop in CAS detected, aborting rendering of chains");
        //                    break;
        //                }

        prevLinkFs = linkFs;
        linkFs = nextLinkFs;
      }
    }
  }