Java JCasUtil.selectCovered Examples, org.uimafit.util.JCasUtil.selectCovered Java Examples

Example #1

0

Show file

File: MultilingualUtils.java Project: rohanramanath/semantics

  public static Pair<Integer, Integer> getAnnotationProjection(
      ComponentAnnotation sourceAnno,
      ArrayListMultimap<StanfordCorenlpToken, StanfordCorenlpToken> tokenAlignment) {
    // compute aligned start and end
    int earliestStart = -1;
    int latestEnd = -1;
    boolean isFirstToken = true;
    for (StanfordCorenlpToken token :
        JCasUtil.selectCovered(StanfordCorenlpToken.class, sourceAnno)) {
      for (StanfordCorenlpToken projectedToken : tokenAlignment.get(token)) {
        if (isFirstToken) {
          earliestStart = projectedToken.getBegin();
          latestEnd = projectedToken.getEnd();
        } else {
          if (earliestStart > projectedToken.getBegin()) {
            earliestStart = projectedToken.getBegin();
          }
          if (latestEnd < projectedToken.getEnd()) {
            latestEnd = projectedToken.getEnd();
          }
        }
        isFirstToken = false;
      }
    }

    return Pair.of(earliestStart, latestEnd);
  }

Example #2

0

Show file

File: SimpleOneLinePerDocWriter.java Project: BMKEG/skmTriage

  public void process(JCas jCas) throws AnalysisEngineProcessException {

    TriageScore doc = JCasUtil.selectSingle(jCas, TriageScore.class);
    String code = doc.getInOutCode();

    File outFile = new File(baseData.getPath() + "/" + code + ".txt");

    try {

      PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(outFile, true)));

      out.print(doc.getVpdmfId() + "	");

      for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        if (tokens.size() <= 0) {
          continue;
        }

        List<String> tokenStrings = JCasUtil.toText(tokens);
        for (int i = 0; i < tokens.size(); i++) {
          out.print(tokenStrings.get(i) + " ");
        }
      }

      out.print("\n");
      out.close();

    } catch (IOException e) {

      throw new AnalysisEngineProcessException(e);
    }
  }

Example #3

0

Show file

File: AnnotationSpanLemmasWriter.java Project: CLLKazan/EvEx

 @Override
 public void process(JCas jCas) throws AnalysisEngineProcessException {
   beforeProcess(jCas);
   try {
     for (AnnotationFS anno : JCasUtil.select(jCas, annotationType)) {
       List<String> lemmas = Lists.newLinkedList();
       for (Token token : JCasUtil.selectCovered(jCas, Token.class, anno)) {
         Word w = token2WordIdx.get(token);
         if (w == null) {
           lemmas.add(token.getCoveredText());
         } else {
           lemmas.add(MorphCasUtils.getOnlyWordform(w).getLemma());
         }
       }
       outEntries.add(lemmaJoiner.join(lemmas));
     }
   } finally {
     afterProcess(jCas);
   }
 }

Example #4

0

Show file

File: StanfordCoreNLPAnnotator.java Project: kuoliu/hw3-kuol

  @Override
  public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

      // create the token annotation
      int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
      int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
      String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
      String lemma = tokenAnn.get(LemmaAnnotation.class);
      Token token = new Token(jCas, begin, end);
      token.setPos(pos);
      token.setLemma(lemma);
      token.addToIndexes();

      // hackery to convert token-level named entity tag into phrase-level tag
      String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
      if (neTag.equals("O") && !lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
      } else {
        if (lastNETag.equals("O")) {
          lastNEBegin = begin;
        } else if (lastNETag.equals(neTag)) {
          // do nothing - begin was already set
        } else {
          NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
          ne.setMentionType(lastNETag);
          ne.addToIndexes();
          lastNEBegin = begin;
        }
        lastNEEnd = end;
      }
      lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
      NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
      ne.setMentionType(lastNETag);
      ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

      // add the sentence annotation
      int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
      int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
      Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
      sentence.addToIndexes();

      // add the syntactic tree annotation
      List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
      Tree tree = sentenceAnn.get(TreeAnnotation.class);
      if (tree.children().length != 1) {
        throw new RuntimeException("Expected single root node, found " + tree);
      }
      tree = tree.firstChild();
      tree.indexSpans(0);
      TopTreebankNode root = new TopTreebankNode(jCas);
      root.setTreebankParse(tree.toString());
      // TODO: root.setTerminals(v)
      this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

      // get the dependencies
      SemanticGraph dependencies =
          sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

      // convert Stanford nodes to UIMA annotations
      List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
      Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
      for (IndexedWord stanfordNode : dependencies.vertexSet()) {
        int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
        int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
        int tokenBegin = tokens.get(indexBegin).getBegin();
        int tokenEnd = tokens.get(indexEnd - 1).getEnd();
        DependencyNode node;
        if (dependencies.getRoots().contains(stanfordNode)) {
          node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
        } else {
          node = new DependencyNode(jCas, tokenBegin, tokenEnd);
        }
        stanfordToUima.put(stanfordNode, node);
      }

      // create relation annotations for each Stanford dependency
      ArrayListMultimap<DependencyNode, DependencyRelation> headRelations =
          ArrayListMultimap.create();
      ArrayListMultimap<DependencyNode, DependencyRelation> childRelations =
          ArrayListMultimap.create();
      for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
        DependencyRelation relation = new DependencyRelation(jCas);
        DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
        DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
        String relationType = stanfordEdge.getRelation().toString();
        if (head == null || child == null || relationType == null) {
          throw new RuntimeException(
              String.format(
                  "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n",
                  relation, child, head));
        }
        relation.setHead(head);
        relation.setChild(child);
        relation.setRelation(relationType);
        relation.addToIndexes();
        headRelations.put(child, relation);
        childRelations.put(head, relation);
      }

      // set the relations for each node annotation
      for (DependencyNode node : stanfordToUima.values()) {
        List<DependencyRelation> heads = headRelations.get(node);
        node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
        if (heads != null) {
          FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
        }
        List<DependencyRelation> children = childRelations.get(node);
        node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
        if (children != null) {
          FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
        }
        node.addToIndexes();
      }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
      spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
      sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
      List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
      for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

        // figure out the character span of the token
        List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
        int begin = tokens.get(corefMention.startIndex - 1).getBegin();
        int end = tokens.get(corefMention.endIndex - 2).getEnd();

        // use an existing named entity mention when possible; otherwise create a new one
        NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
        if (mention == null) {
          mention = new NamedEntityMention(jCas, begin, end);
          mention.addToIndexes();
        }
        mentions.add(mention);
      }

      // create an entity for the mentions
      Collections.sort(
          mentions,
          new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
              return m1.getBegin() - m2.getBegin();
            }
          });

      // create mentions and add them to entity
      NamedEntity entity = new NamedEntity(jCas);
      entity.setMentions(new FSArray(jCas, mentions.size()));
      int index = 0;
      for (NamedEntityMention mention : mentions) {
        mention.setMentionedEntity(entity);
        entity.setMentions(index, mention);
        index += 1;
      }
      entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
      if (mention.getMentionedEntity() == null) {
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, 1));
        entity.setMentions(0, mention);
        mention.setMentionedEntity(entity);
        entity.getMentions();
        entities.add(entity);
      }
    }

    // sort entities by document order
    Collections.sort(
        entities,
        new Comparator<NamedEntity>() {
          @Override
          public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
          }

          private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention :
                JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
              if (mention.getBegin() < min) {
                min = mention.getBegin();
              }
            }
            return min;
          }
        });

    // add entities to document
    for (NamedEntity entity : entities) {
      entity.addToIndexes();
    }
  }

Example #5

0

Show file

File: BIU_LAP_Test.java Project: brittazeller/Excitement-Open-Platform

  private void verifyToken(Token token, TestTokenInfo info) throws LAPVerificationException {
    if (!info.text.equals(token.getCoveredText()))
      throw new LAPVerificationException(
          "Bad token text for "
              + info.id
              + ":"
              + info.text
              + ", expected \""
              + info.text
              + "\", got \""
              + token.getCoveredText()
              + "\"");
    if (info.begin != token.getBegin())
      throw new LAPVerificationException(
          "Bad token begin index for "
              + info.id
              + ":"
              + info.text
              + ", expected "
              + info.begin
              + ", got "
              + token.getBegin());
    if (info.end != token.getEnd())
      throw new LAPVerificationException(
          "Bad token end index for "
              + info.id
              + ":"
              + info.text
              + ", expected "
              + info.end
              + ", got "
              + token.getEnd());
    if (!info.lemma.equals(token.getLemma().getValue()))
      throw new LAPVerificationException(
          "Bad token lemma for "
              + info.id
              + ":"
              + info.text
              + ", expected \""
              + info.lemma
              + "\", got \""
              + token.getLemma().getValue()
              + "\"");
    if (!info.posType.equals(token.getPos().getType().getShortName()))
      throw new LAPVerificationException(
          "Bad token POS type for "
              + info.id
              + ":"
              + info.text
              + ", expected "
              + info.posType
              + ", got "
              + token.getPos().getType().getShortName());
    if (!info.posValue.equals(token.getPos().getPosValue()))
      throw new LAPVerificationException(
          "Bad token POS value for "
              + info.id
              + ":"
              + info.text
              + ", expected \""
              + info.posValue
              + "\", got \""
              + token.getPos().getPosValue()
              + "\"");

    String nerType = null;
    List<NamedEntity> ners = JCasUtil.selectCovered(NamedEntity.class, token);
    if (ners.size() == 1) {
      nerType = ners.get(0).getType().getShortName();
    } else if (ners.size() > 1) {
      throw new LAPVerificationException(
          "Got more than one NER annotation for " + info.id + ":" + info.text + " - " + ners);
    }
    if (!Objects.equals(info.nerType, nerType))
      throw new LAPVerificationException(
          "Bad token NER value for "
              + info.id
              + ":"
              + info.text
              + ", expected \""
              + info.nerType
              + "\", got \""
              + nerType
              + "\"");

    Set<TestDependencyInfo> infoDependencies =
        new HashSet<TestDependencyInfo>(Arrays.asList(info.dependencies));
    if (!infoDependencies.equals(governors.get(token)))
      throw new LAPVerificationException(
          "Bad token dependencies for "
              + info.id
              + ":"
              + info.text
              + ", expected "
              + infoDependencies
              + ", got "
              + governors.get(token));

    System.out.println("Verified token: " + info);
  }