Ejemplo n.º 1
0
  private void convertConstituent(
      View aView, org.apache.uima.jcas.tcas.Annotation aNode, Set<String> aConstituents) {
    if (aNode instanceof Constituent) {
      // LAPPS constituents inherit from Relation which has no offsets
      Annotation constituent =
          aView.newAnnotation(id(CONSTITUENT, aNode), Discriminators.Uri.CONSTITUENT);
      aConstituents.add(constituent.getId());

      for (org.apache.uima.jcas.tcas.Annotation child :
          select(((Constituent) aNode).getChildren(), org.apache.uima.jcas.tcas.Annotation.class)) {
        convertConstituent(aView, child, aConstituents);
      }
    } else if (aNode instanceof Token) {
      aConstituents.add(id(TOKEN, aNode));
    } else {
      throw new IllegalStateException("Unexpected node type: " + aNode);
    }
  }
Ejemplo n.º 2
0
  @Override
  public void process(JCas aJCas) throws AnalysisEngineProcessException {
    // Convert UIMA to LIF Container
    Container container = new Container();
    container.setLanguage(aJCas.getDocumentLanguage());
    container.setText(aJCas.getDocumentText());

    View view = container.newView();

    // Paragraph
    for (Paragraph p : select(aJCas, Paragraph.class)) {
      view.newAnnotation(id(PARAGRAPH, p), Discriminators.Uri.PARAGRAPH, p.getBegin(), p.getEnd());
    }

    // Sentence
    for (Sentence s : select(aJCas, Sentence.class)) {
      view.newAnnotation(id(SENTENCE, s), Discriminators.Uri.SENTENCE, s.getBegin(), s.getEnd());
    }

    // Token, POS, Lemma
    for (Token t : select(aJCas, Token.class)) {
      Annotation a =
          view.newAnnotation(id(TOKEN, t), Discriminators.Uri.TOKEN, t.getBegin(), t.getEnd());
      if (t.getPos() != null) {
        a.addFeature(Features.Token.POS, t.getPos().getPosValue());
      }

      if (t.getLemma() != null) {
        a.addFeature(Features.Token.LEMMA, t.getLemma().getValue());
      }
    }

    // NamedEntity
    for (NamedEntity neAnno : select(aJCas, NamedEntity.class)) {
      Annotation ne =
          view.newAnnotation(
              id(NAMED_ENTITY, neAnno), Discriminators.Uri.NE, neAnno.getBegin(), neAnno.getEnd());
      ne.setLabel(neAnno.getValue());
    }

    // Dependency
    for (Sentence s : select(aJCas, Sentence.class)) {
      Set<String> depRelIds = new TreeSet<>();

      for (Dependency dep : selectCovered(Dependency.class, s)) {
        String depRelId = id(DEPENDENCY, dep);
        // LAPPS dependencies inherit from Relation which has no offsets
        Annotation depRel = view.newAnnotation(depRelId, Discriminators.Uri.DEPENDENCY);
        depRel.setLabel(dep.getDependencyType());
        depRel.addFeature(Features.Dependency.GOVERNOR, id(TOKEN, dep.getGovernor()));
        depRel.addFeature(Features.Dependency.DEPENDENT, id(TOKEN, dep.getDependent()));
        depRelIds.add(depRelId);
      }

      if (!depRelIds.isEmpty()) {
        Annotation depStruct =
            view.newAnnotation(
                id(DEPENDENCY_STRUCTURE, s),
                Discriminators.Uri.DEPENDENCY_STRUCTURE,
                s.getBegin(),
                s.getEnd());
        depStruct.addFeature(Features.DependencyStructure.DEPENDENCIES, depRelIds);
      }
    }

    // Constituents
    for (ROOT r : select(aJCas, ROOT.class)) {
      Set<String> constituents = new LinkedHashSet<>();
      convertConstituent(view, r, constituents);

      Annotation phraseStruct =
          view.newAnnotation(
              id(PHRASE_STRUCTURE, r),
              Discriminators.Uri.PHRASE_STRUCTURE,
              r.getBegin(),
              r.getEnd());
      phraseStruct.addFeature(Features.PhraseStructure.CONSTITUENTS, constituents);
    }

    try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
      String json = Serializer.toPrettyJson(container);
      IOUtils.write(json, docOS, encoding);
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }
  }