private void convertConstituent( View aView, org.apache.uima.jcas.tcas.Annotation aNode, Set<String> aConstituents) { if (aNode instanceof Constituent) { // LAPPS constituents inherit from Relation which has no offsets Annotation constituent = aView.newAnnotation(id(CONSTITUENT, aNode), Discriminators.Uri.CONSTITUENT); aConstituents.add(constituent.getId()); for (org.apache.uima.jcas.tcas.Annotation child : select(((Constituent) aNode).getChildren(), org.apache.uima.jcas.tcas.Annotation.class)) { convertConstituent(aView, child, aConstituents); } } else if (aNode instanceof Token) { aConstituents.add(id(TOKEN, aNode)); } else { throw new IllegalStateException("Unexpected node type: " + aNode); } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { // Convert UIMA to LIF Container Container container = new Container(); container.setLanguage(aJCas.getDocumentLanguage()); container.setText(aJCas.getDocumentText()); View view = container.newView(); // Paragraph for (Paragraph p : select(aJCas, Paragraph.class)) { view.newAnnotation(id(PARAGRAPH, p), Discriminators.Uri.PARAGRAPH, p.getBegin(), p.getEnd()); } // Sentence for (Sentence s : select(aJCas, Sentence.class)) { view.newAnnotation(id(SENTENCE, s), Discriminators.Uri.SENTENCE, s.getBegin(), s.getEnd()); } // Token, POS, Lemma for (Token t : select(aJCas, Token.class)) { Annotation a = view.newAnnotation(id(TOKEN, t), Discriminators.Uri.TOKEN, t.getBegin(), t.getEnd()); if (t.getPos() != null) { a.addFeature(Features.Token.POS, t.getPos().getPosValue()); } if (t.getLemma() != null) { a.addFeature(Features.Token.LEMMA, t.getLemma().getValue()); } } // NamedEntity for (NamedEntity neAnno : select(aJCas, NamedEntity.class)) { Annotation ne = view.newAnnotation( id(NAMED_ENTITY, neAnno), Discriminators.Uri.NE, neAnno.getBegin(), neAnno.getEnd()); ne.setLabel(neAnno.getValue()); } // Dependency for (Sentence s : select(aJCas, Sentence.class)) { Set<String> depRelIds = new TreeSet<>(); for (Dependency dep : selectCovered(Dependency.class, s)) { String depRelId = id(DEPENDENCY, dep); // LAPPS dependencies inherit from Relation which has no offsets Annotation depRel = view.newAnnotation(depRelId, Discriminators.Uri.DEPENDENCY); depRel.setLabel(dep.getDependencyType()); depRel.addFeature(Features.Dependency.GOVERNOR, id(TOKEN, dep.getGovernor())); depRel.addFeature(Features.Dependency.DEPENDENT, id(TOKEN, dep.getDependent())); depRelIds.add(depRelId); } if (!depRelIds.isEmpty()) { Annotation depStruct = view.newAnnotation( id(DEPENDENCY_STRUCTURE, s), Discriminators.Uri.DEPENDENCY_STRUCTURE, s.getBegin(), s.getEnd()); depStruct.addFeature(Features.DependencyStructure.DEPENDENCIES, depRelIds); } } // Constituents for (ROOT r : select(aJCas, ROOT.class)) { Set<String> constituents = new LinkedHashSet<>(); convertConstituent(view, r, constituents); Annotation phraseStruct = view.newAnnotation( id(PHRASE_STRUCTURE, r), Discriminators.Uri.PHRASE_STRUCTURE, r.getBegin(), r.getEnd()); phraseStruct.addFeature(Features.PhraseStructure.CONSTITUENTS, constituents); } try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) { String json = Serializer.toPrettyJson(container); IOUtils.write(json, docOS, encoding); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }