Пример #1
0
 @Override
 public void process(JCas cas) throws AnalysisEngineProcessException {
   this.setSource(cas);
   this.setTarget(cas);
   AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType());
   FSIterator<Annotation> iter = index.iterator();
   while (iter.hasNext()) {
     Annotation annotation = iter.next();
     String source = null;
     if (this.getSourceFeature() == null) {
       source = annotation.getCoveredText();
     } else {
       source = annotation.getStringValue(this.getSourceFeature());
     }
     if (source != null) {
       String target = this.getMapping().get(source);
       if (target != null) {
         if (this.update().booleanValue()) {
           this.update(cas, annotation, this.getTargetFeature(), target);
         } else {
           this.create(
               cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target);
         }
       }
     }
   }
 }
 /** {@inheritDoc} */
 @Override
 public void onMatching(
     String docUri, Type type, SortedSet<Annotation> goldAnnos, SortedSet<Annotation> sysAnnos) {
   if (goldAnnos.size() == 1 && sysAnnos.size() == 1) {
     Annotation goldAnno = goldAnnos.iterator().next();
     Annotation sysAnno = sysAnnos.iterator().next();
     if (goldAnno.getBegin() == sysAnno.getBegin() && goldAnno.getEnd() == sysAnno.getEnd()) {
       printRow(
           type.getShortName(),
           "Exact",
           goldAnno.getCoveredText(),
           String.valueOf(goldAnno.getBegin()),
           sysAnno.getCoveredText(),
           String.valueOf(sysAnno.getBegin()),
           docUri);
       return;
     }
   }
   printRow(
       type.getShortName(),
       "Partial",
       Joiner.on(" /// ").join(transform(goldAnnos, annoToTxt)),
       Joiner.on(", ").join(transform(goldAnnos, annoToOffset)),
       Joiner.on(" /// ").join(transform(sysAnnos, annoToTxt)),
       Joiner.on(", ").join(transform(sysAnnos, annoToOffset)),
       docUri);
 }
Пример #3
0
 public static void showJCas(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator();
   Map<String, MutableInt> counters = new TreeMap<String, MutableInt>();
   int total = 0;
   while (it.hasNext()) {
     total += 1;
     String annoType = "rien";
     try {
       Annotation annotation = (Annotation) it.next();
       annoType = annotation.getType().getName();
     } catch (NullPointerException e) {
       it.moveToNext();
       annoType = e.getClass().getCanonicalName();
     }
     if (counters.get(annoType) == null) {
       counters.put(annoType, new MutableInt(1));
     } else {
       counters.get(annoType).increment();
     }
   }
   System.out.println(
       "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total);
   for (String annoType : counters.keySet()) {
     System.out.println(annoType + ": " + counters.get(annoType));
   }
 }
 /** {@inheritDoc} */
 @Override
 public void onSpurious(String docUri, Type type, Annotation sysAnno) {
   printRow(
       type.getShortName(),
       "Spurious",
       null,
       null,
       sysAnno.getCoveredText(),
       String.valueOf(sysAnno.getBegin()),
       docUri);
 }
 /** {@inheritDoc} */
 @Override
 public void onMissing(String docUri, Type type, Annotation goldAnno) {
   printRow(
       type.getShortName(),
       "Missing",
       goldAnno.getCoveredText(),
       String.valueOf(goldAnno.getBegin()),
       null,
       null,
       docUri);
 }
 @Override
 public void process(JCas jCas) throws AnalysisEngineProcessException {
   if (windowClass != null) {
     for (Annotation window : JCasUtil.select(jCas, windowClass)) {
       String text = window.getCoveredText();
       createParentheticals(jCas, text, window.getBegin());
     }
   } else {
     String text = jCas.getDocumentText();
     createParentheticals(jCas, text, 0);
   }
 }
Пример #7
0
  @Override
  public void process(final JCas jCas) throws AnalysisEngineProcessException {
    beforeRuleProcessing(jCas);

    if (!this.labelFeature.equals(NO_SET_LABEL)) {
      // Must set labels
      FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator();
      Feature feat = this.getIteratedType(jCas).getFeatureByBaseName(this.labelFeature);
      while (it.hasNext()) {
        Annotation word = (Annotation) it.next();
        word.setStringValue(feat, this.resource.getMatchingLabelString(word));
      }
    }

    RecognitionHandler recognitionHandler =
        new RecognitionHandler() {
          @Override
          public void recognizedEpisode(RegexOccurrence episode) {
            ruleMatched(jCas, episode);
          }
        };
    for (final Rule rule : this.resource.getRules()) {
      rule.getAutomaton().setAllowOverlappingInstances(this.allowOverlappingOccurrences);
      rule.getAutomaton().addRecognitionHandler(recognitionHandler);
      rule.getAutomaton().reset();
    }

    FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator();
    while (it.hasNext()) {
      Annotation word = (Annotation) it.next();
      boolean allRulesFailed = true;
      for (Rule rule : this.resource.getRules()) {
        rule.getAutomaton().nextAnnotation(word);
        allRulesFailed &= rule.getAutomaton().currentInstancesNum() == 0;
      }
      if (allRulesFailed) allRulesFailed(jCas);
    }

    for (Rule rule : this.resource.getRules()) rule.getAutomaton().finish();
    for (final Rule rule : this.resource.getRules())
      rule.getAutomaton().removeRecognitionHandler(recognitionHandler);

    afterRuleProcessing(jCas);
  }
 private void createParentheticals(JCas jCas, String text, int offset)
     throws AnalysisEngineProcessException {
   Stack<Integer> leftRoundedParens = new Stack<Integer>();
   leftRoundedParens.clear();
   for (int ci = 0; ci < text.length(); ci++) {
     char c = text.charAt(ci);
     if (c == leftParen) {
       leftRoundedParens.push(ci);
     }
     if (c == rightParen && !leftRoundedParens.isEmpty()) {
       int leftOffset = leftRoundedParens.pop();
       Annotation ann;
       try {
         ann = parentheticalConstructor.newInstance(jCas, offset + leftOffset, offset + ci + 1);
       } catch (Exception e) {
         throw new AnalysisEngineProcessException(e);
       }
       ann.addToIndexes();
     }
   }
 }
  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    Type type = jcas.getCas().getTypeSystem().getType(TYPE_NAME);
    Feature entityFeature = type.getFeatureByBaseName(ENTITY_FEATURE);
    Feature nameFeature = type.getFeatureByBaseName(NAME_FEATURE);

    for (Annotation annotation : jcas.getAnnotationIndex(TokenAnnotation.type)) {
      String tokenPOS = ((TokenAnnotation) annotation).getPosTag();

      if (NP.equals(tokenPOS) || NPS.equals(tokenPOS)) {
        AnnotationFS entityAnnotation =
            jcas.getCas().createAnnotation(type, annotation.getBegin(), annotation.getEnd());

        entityAnnotation.setStringValue(entityFeature, annotation.getCoveredText());

        String name =
            "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
        if (annotation.getCoveredText().equals("Apache")) name = "ORGANIZATION";
        entityAnnotation.setStringValue(nameFeature, name);

        jcas.addFsToIndexes(entityAnnotation);
      }
    }
  }
Пример #10
0
  /** Outputs Trigrams for an input Annotation. */
  public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) {

    AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type);
    Iterator<Annotation> tIterator = tokens.subiterator(annotation);

    Annotation token = null;
    Annotation prev = null;
    Annotation prev_prev = null;

    while (tIterator.hasNext()) {

      prev_prev = prev;
      prev = token;
      token = tIterator.next();

      if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) {

        NGram ngram = new NGram(aJCas);
        FSArray ngArray = new FSArray(aJCas, 3);

        ngArray.set(0, prev_prev);
        ngArray.set(1, prev);
        ngArray.set(2, token);

        ngram.setBegin(prev_prev.getBegin());
        ngram.setEnd(token.getEnd());

        ngram.setElements(ngArray);
        ngram.setElementType("edu.cmu.deiis.types.Token");

        ngram.setConfidence(1D);
        ngram.setCasProcessorId(PROCESSOR_ID);
        ngram.addToIndexes();
      }
    }
  }
Пример #11
0
 private void remove() {
   for (Annotation annotation : this.getAnnotations()) {
     annotation.removeFromIndexes();
   }
 }
 /**
  * Static method that changes the annotation passed by off-setting it offset from zero
  *
  * @param annotation
  * @param offset - The offset as computed by (copy - original)
  */
 private static void updateAnnotation(Annotation annotation, int offset) {
   if (annotation == null) return;
   annotation.setBegin(annotation.getBegin() + offset);
   annotation.setEnd(annotation.getEnd() + offset);
 }
Пример #13
0
 @Override
 public Integer apply(Annotation input) {
   return input.getBegin();
 }
Пример #14
0
 @Override
 public String apply(Annotation input) {
   return input.getCoveredText();
 }
Пример #15
0
 private void update(JCas cas, Annotation annotation, Feature feature, String value) {
   annotation.setStringValue(feature, value);
 }
Пример #16
0
 public static boolean sameIndexes(Annotation anno1, Annotation anno2) {
   return anno1.getBegin() == anno2.getBegin() && anno1.getEnd() == anno2.getEnd();
 }
Пример #17
0
 public static boolean contains(Annotation container, Annotation subAnnotation) {
   return container.getBegin() <= subAnnotation.getBegin()
       && container.getEnd() >= subAnnotation.getEnd();
 }
Пример #18
0
  public static Tree createStanfordTree(Annotation root, TreeFactory tFact) {
    JCas aJCas;
    try {
      aJCas = root.getCAS().getJCas();
    } catch (CASException e) {
      throw new IllegalStateException("Unable to get JCas from JCas wrapper");
    }

    // define the new (root) node
    Tree rootNode;

    // before we can create a node, we must check if we have any children (we have to know
    // whether to create a node or a leaf - not very dynamic)
    if (root instanceof Constituent && !isLeaf((Constituent) root)) {
      Constituent node = (Constituent) root;
      List<Tree> childNodes = new ArrayList<Tree>();

      // get childNodes from child annotations
      FSArray children = node.getChildren();
      for (int i = 0; i < children.size(); i++) {
        childNodes.add(createStanfordTree(node.getChildren(i), tFact));
      }

      // now create the node with its children
      rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes);

    } else {
      // Handle leaf annotations
      // Leafs are always Token-annotations
      // We also have to insert a Preterminal node with the value of the
      // POS-Annotation on the token
      // because the POS is not directly stored within the treee
      Token wordAnnotation = (Token) root;

      // create leaf-node for the tree
      Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText());

      // create information about preceding and trailing whitespaces in the leaf node
      StringBuilder preWhitespaces = new StringBuilder();
      StringBuilder trailWhitespaces = new StringBuilder();

      List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1);
      List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1);

      if (precedingTokenList.size() > 0) {
        Token precedingToken = precedingTokenList.get(0);
        int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd();
        for (int i = 0; i < precedingWhitespaces; i++) {
          preWhitespaces.append(" ");
        }
      }
      if (followingTokenList.size() > 0) {
        Token followingToken = followingTokenList.get(0);
        int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd();
        for (int i = 0; i < trailingWhitespaces; i++) {
          trailWhitespaces.append(" ");
        }
      }

      // write whitespace information as CoreAnnotation.BeforeAnnotation and
      // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to
      // node label
      ((CoreLabel) wordNode.label())
          .set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString());
      ((CoreLabel) wordNode.label())
          .set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString());

      // get POS-annotation
      // get the token that is covered by the POS
      List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation);
      // the POS should only cover one token
      assert coveredPos.size() == 1;
      POS pos = coveredPos.get(0);

      // create POS-Node in the tree and attach word-node to it
      rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] {wordNode})));
    }

    return rootNode;
  }
Пример #19
0
  /**
   * Recreates a Stanford Tree from the StanfordParser annotations and saves all
   * non-StanfordParser-Annotations within the scope of the sentence in the label of the best
   * fitting node.
   *
   * <p><strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which
   * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!!
   * You do NOT want to use the source CAS after this method has been called. The
   * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations do
   * not have to be recovered or accessed in the tree.</i>
   *
   * <p>TODO: This behavior could be changed by making COPIES of the annotations and changing the
   * copied instead of the originals. However, in order to being able to make copies, a dummy CAS
   * must be introduced to which the annotations can be copied. When they are recovered, they will
   * be copied to the new destination CAS anyway.
   *
   * @param root the ROOT annotation
   * @return an {@link Tree} object representing the syntax structure of the sentence
   * @throws CASException if the JCas cannot be accessed.
   */
  public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException {
    JCas aJCas = root.getCAS().getJCas();

    // Create tree
    Tree tree = createStanfordTree(root);

    // Get all non-parser related annotations
    // and all tokens (needed for span-calculations later on)
    List<Annotation> nonParserAnnotations = new ArrayList<Annotation>();
    List<Token> tokens = new ArrayList<Token>();

    // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all
    // cases
    List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root);

    for (Annotation curAnno : annosWithinRoot) {
      if (!(curAnno instanceof POS)
          && !(curAnno instanceof Constituent)
          && !(curAnno instanceof Dependency)
          && !(curAnno instanceof PennTree)
          && !(curAnno instanceof Lemma)
          && !(curAnno instanceof Token)
          && !(curAnno instanceof DocumentMetaData)) {
        nonParserAnnotations.add(curAnno);
      } else if (curAnno instanceof Token) {
        tokens.add((Token) curAnno);
      }
    }

    // create wrapper for tree and its tokens
    TreeWithTokens annoTree = new TreeWithTokens(tree, tokens);

    /*
     * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the
     * deepest node in the tree that still completely contains the annotation.
     */
    for (Annotation curAnno : nonParserAnnotations) {
      // get best fitting node
      Tree bestFittingNode = annoTree.getBestFit(curAnno);

      // Add annotation to node
      if (bestFittingNode != null) {

        // translate annotation span to a value relative to the
        // node-span
        IntPair span = annoTree.getSpan(bestFittingNode);
        curAnno.setBegin(curAnno.getBegin() - span.getSource());
        curAnno.setEnd(curAnno.getEnd() - span.getSource());

        // get the collection from the label of the best-fitting node in which we store UIMA
        // annotations or create it, if it does not exist
        Collection<Annotation> annotations =
            ((CoreLabel) bestFittingNode.label()).get(UIMAAnnotations.class);
        if (annotations == null) {
          annotations = new ArrayList<Annotation>();
        }

        // add annotation + checksum of annotated text to list and write it back to node
        // label
        annotations.add(curAnno);

        ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations);
      }
    }

    return tree;
  }
Пример #20
0
 /**
  * Echo in the standard output (the console) the type and the covered text of the given annotation
  *
  * @param anAnnotation
  */
 public static void echo(Annotation anAnnotation) {
   System.out.printf(
       "type>%s<\t\tcoveredText>%s<\n",
       anAnnotation.getClass().getSimpleName(), anAnnotation.getCoveredText());
 }