protected Sentence createSentence(final JCas aJCas, final int aBegin, final int aEnd) { int[] span = new int[] {aBegin, aEnd}; trim(aJCas.getDocumentText(), span); if (!isEmpty(span[0], span[1]) && isWriteSentence()) { Sentence seg = new Sentence(aJCas, span[0], span[1]); seg.addToIndexes(aJCas); // sentenceCount++; tokenCount = 0; return seg; } else { return null; } }
public void updatePanel(AjaxRequestTarget aTarget, CurationContainer aCC) throws UIMAException, ClassNotFoundException, IOException, BratAnnotationException { JCas jCas = repository.readCurationCas(bModel.getDocument()); final int sentenceAddress = getAddr( selectSentenceAt(jCas, bModel.getSentenceBeginOffset(), bModel.getSentenceEndOffset())); bModel.setSentenceAddress(sentenceAddress); final Sentence sentence = selectByAddr(jCas, Sentence.class, sentenceAddress); List<Sentence> followingSentences = selectFollowing(jCas, Sentence.class, sentence, bModel.getPreferences().getWindowSize()); // Check also, when getting the last sentence address in the display window, if this is the // last sentence or the ONLY sentence in the document Sentence lastSentenceAddressInDisplayWindow = followingSentences.size() == 0 ? sentence : followingSentences.get(followingSentences.size() - 1); if (curationView == null) { curationView = new SourceListView(); } curationView.setCurationBegin(sentence.getBegin()); curationView.setCurationEnd(lastSentenceAddressInDisplayWindow.getEnd()); int ws = bModel.getPreferences().getWindowSize(); Sentence fs = BratAjaxCasUtil.selectSentenceAt( jCas, bModel.getSentenceBeginOffset(), bModel.getSentenceEndOffset()); int l = BratAjaxCasUtil.getLastSentenceAddressInDisplayWindow(jCas, getAddr(fs), ws); Sentence ls = (Sentence) selectByAddr(jCas, FeatureStructure.class, l); fSn = BratAjaxCasUtil.getSentenceNumber(jCas, fs.getBegin()); lSn = BratAjaxCasUtil.getSentenceNumber(jCas, ls.getBegin()); sentencesListView.addOrReplace(sentenceList); aTarget.add(sentencesListView); /* * corssSentAnnoView.addOrReplace(crossSentAnnoList); aTarget.add(corssSentAnnoView); */ aTarget.add(suggestionViewPanel); if (annotate) { annotator.bratRender(aTarget, editor.getCas(bModel)); annotator.bratSetHighlight(aTarget, bModel.getSelection().getAnnotation()); } else { annotator.bratRenderLater(aTarget); } annotate = false; CuratorUtil.updatePanel( aTarget, suggestionViewPanel, aCC, annotator, repository, annotationSelectionByUsernameAndAddress, curationView, annotationService, userRepository); }
private void updateCrossSentAnnoList( Map<Integer, Integer> segmentBeginEnd, Map<String, JCas> jCases, List<Type> entryTypes) { crossSentenceLists = new HashMap<>(); for (Integer begin : segmentBeginEnd.keySet()) { int thisSent = -1; Set<Integer> crossSents = new HashSet<>(); for (Type t : entryTypes) { for (JCas c : jCases.values()) { if (thisSent == -1) { thisSent = BratAjaxCasUtil.getSentenceNumber(c, begin); } // update cross-sentence annotation lists for (AnnotationFS fs : selectCovered(c.getCas(), t, this.begin, end)) { // CASE 1. annotation begins here if (fs.getBegin() >= begin && fs.getBegin() <= segmentBeginEnd.get(begin)) { if (fs.getEnd() > segmentBeginEnd.get(begin) || fs.getEnd() < begin) { Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd()); int thatSent = BratAjaxCasUtil.getSentenceNumber(c, s.getBegin()); crossSents.add(thatSent); } } // CASE 2. Annotation ends here else if (fs.getEnd() >= begin && fs.getEnd() <= segmentBeginEnd.get(begin)) { if (fs.getBegin() > segmentBeginEnd.get(begin) || fs.getBegin() < begin) { int thatSent = BratAjaxCasUtil.getSentenceNumber(c, fs.getBegin()); crossSents.add(thatSent); } } } for (AnnotationFS fs : selectCovered(c.getCas(), t, begin, end)) { if (fs.getBegin() <= segmentBeginEnd.get(begin) && fs.getEnd() > segmentBeginEnd.get(begin)) { Sentence s = BratAjaxCasUtil.getSentenceByAnnoEnd(c, fs.getEnd()); segmentBeginEnd.put(begin, s.getEnd()); } } } } crossSentenceLists.put(thisSent, crossSents); } }
private void setCurationSegmentBeginEnd() throws UIMAException, ClassNotFoundException, IOException { JCas jCas = repository.readAnnotationCas(bModel.getDocument(), bModel.getUser()); final int sentenceAddress = getAddr( selectSentenceAt(jCas, bModel.getSentenceBeginOffset(), bModel.getSentenceEndOffset())); final Sentence sentence = selectByAddr(jCas, Sentence.class, sentenceAddress); List<Sentence> followingSentences = selectFollowing(jCas, Sentence.class, sentence, bModel.getPreferences().getWindowSize()); // Check also, when getting the last sentence address in the display window, if this is the // last sentence or the ONLY sentence in the document Sentence lastSentenceAddressInDisplayWindow = followingSentences.size() == 0 ? sentence : followingSentences.get(followingSentences.size() - 1); curationSegment.setBegin(sentence.getBegin()); curationSegment.setEnd(lastSentenceAddressInDisplayWindow.getEnd()); }
@Override public void getNext(CAS aCAS) throws IOException, CollectionException { super.getNext(aCAS); JCas jcas; try { jcas = aCAS.getJCas(); // consider a tweet to be a sentence Sentence sentenceAnno = new Sentence(jcas); sentenceAnno.setBegin(0); sentenceAnno.setEnd(jcas.getDocumentText().length()); sentenceAnno.addToIndexes(); } catch (CASException e) { throw new CollectionException(); } TextClassificationOutcome outcome = new TextClassificationOutcome(jcas); outcome.setOutcome(getTextClassificationOutcome(jcas)); outcome.addToIndexes(); }
private void ubdateSentenceNumber(JCas aJCas, int aAddress) { bModel.setSentenceAddress(aAddress); Sentence sentence = selectByAddr(aJCas, Sentence.class, aAddress); bModel.setSentenceBeginOffset(sentence.getBegin()); bModel.setSentenceEndOffset(sentence.getEnd()); bModel.setSentenceNumber(BratAjaxCasUtil.getSentenceNumber(aJCas, sentence.getBegin())); Sentence firstSentence = selectSentenceAt(aJCas, bModel.getSentenceBeginOffset(), bModel.getSentenceEndOffset()); int lastAddressInPage = getLastSentenceAddressInDisplayWindow( aJCas, getAddr(firstSentence), bModel.getPreferences().getWindowSize()); // the last sentence address in the display window Sentence lastSentenceInPage = (Sentence) selectByAddr(aJCas, FeatureStructure.class, lastAddressInPage); bModel.setFSN(BratAjaxCasUtil.getSentenceNumber(aJCas, firstSentence.getBegin())); bModel.setLSN(BratAjaxCasUtil.getSentenceNumber(aJCas, lastSentenceInPage.getBegin())); }
/** * Puts JCases into a list and get a random annotation document that will be used as a base for * the diff. * * @throws IOException * @throws ClassNotFoundException * @throws UIMAException */ private void updateSegment( BratAnnotatorModel aBratAnnotatorModel, Map<Integer, Integer> segmentBeginEnd, Map<Integer, Integer> segmentNumber, Map<String, Map<Integer, Integer>> segmentAdress, JCas jCas, String username, int aWinSize) throws UIMAException, ClassNotFoundException, IOException { Sentence firstSentence = selectSentenceAt( jCas, aBratAnnotatorModel.getSentenceBeginOffset(), aBratAnnotatorModel.getSentenceEndOffset()); Sentence lastSentence = selectByAddr( jCas, Sentence.class, getLastSentenceAddressInDisplayWindow(jCas, getAddr(firstSentence), aWinSize)); begin = firstSentence.getBegin(); end = lastSentence.getEnd(); sentenceNumber = getFirstSentenceNumber(jCas, getAddr(firstSentence)); segmentAdress.put(username, new HashMap<Integer, Integer>()); for (Sentence sentence : selectCovered(jCas, Sentence.class, begin, end)) { sentenceNumber += 1; segmentBeginEnd.put(sentence.getBegin(), sentence.getEnd()); segmentNumber.put(sentence.getBegin(), sentenceNumber); segmentAdress.get(username).put(sentence.getBegin(), getAddr(sentence)); } /* * if (segmentBeginEnd.isEmpty()) { for (Sentence sentence : selectCovered(mergeJCas, * Sentence.class, begin, end)) { * * } } */ }
private void verifySentence(Sentence sentence, TestSentenceInfo info) throws LAPVerificationException { if (!info.text.equals(sentence.getCoveredText())) throw new LAPVerificationException( "Bad sentence text, expected \"" + info.text + "\", got \"" + sentence.getCoveredText() + "\""); if (info.begin != sentence.getBegin()) throw new LAPVerificationException( "Bad sentence begin index, expected " + info.begin + ", got " + sentence.getBegin()); if (info.end != sentence.getEnd()) throw new LAPVerificationException( "Bad sentence end index, expected " + info.end + ", got " + sentence.getEnd()); System.out.println("Verified sentence: " + info); }
private void updateCurationView( final CurationContainer curationContainer, final SourceListView curationViewItem, AjaxRequestTarget aTarget, JCas jCas) { int currentSentAddress = BratAjaxCasUtil.getCurrentSentence( jCas, curationViewItem.getBegin(), curationViewItem.getEnd()) .getAddress(); bModel.setSentenceAddress( BratAjaxCasUtil.getSentenceBeginAddress( jCas, currentSentAddress, curationViewItem.getBegin(), bModel.getProject(), bModel.getDocument(), bModel.getPreferences().getWindowSize())); Sentence sentence = selectByAddr(jCas, Sentence.class, bModel.getSentenceAddress()); bModel.setSentenceBeginOffset(sentence.getBegin()); bModel.setSentenceEndOffset(sentence.getEnd()); Sentence firstSentence = selectSentenceAt(jCas, bModel.getSentenceBeginOffset(), bModel.getSentenceEndOffset()); int lastAddressInPage = getLastSentenceAddressInDisplayWindow( jCas, getAddr(firstSentence), bModel.getPreferences().getWindowSize()); // the last sentence address in the display window Sentence lastSentenceInPage = (Sentence) selectByAddr(jCas, FeatureStructure.class, lastAddressInPage); bModel.setFSN(BratAjaxCasUtil.getSentenceNumber(jCas, firstSentence.getBegin())); bModel.setLSN(BratAjaxCasUtil.getSentenceNumber(jCas, lastSentenceInPage.getBegin())); curationContainer.setBratAnnotatorModel(bModel); onChange(aTarget); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { // Convert UIMA to LIF Container Container container = new Container(); container.setLanguage(aJCas.getDocumentLanguage()); container.setText(aJCas.getDocumentText()); View view = container.newView(); // Paragraph for (Paragraph p : select(aJCas, Paragraph.class)) { view.newAnnotation(id(PARAGRAPH, p), Discriminators.Uri.PARAGRAPH, p.getBegin(), p.getEnd()); } // Sentence for (Sentence s : select(aJCas, Sentence.class)) { view.newAnnotation(id(SENTENCE, s), Discriminators.Uri.SENTENCE, s.getBegin(), s.getEnd()); } // Token, POS, Lemma for (Token t : select(aJCas, Token.class)) { Annotation a = view.newAnnotation(id(TOKEN, t), Discriminators.Uri.TOKEN, t.getBegin(), t.getEnd()); if (t.getPos() != null) { a.addFeature(Features.Token.POS, t.getPos().getPosValue()); } if (t.getLemma() != null) { a.addFeature(Features.Token.LEMMA, t.getLemma().getValue()); } } // NamedEntity for (NamedEntity neAnno : select(aJCas, NamedEntity.class)) { Annotation ne = view.newAnnotation( id(NAMED_ENTITY, neAnno), Discriminators.Uri.NE, neAnno.getBegin(), neAnno.getEnd()); ne.setLabel(neAnno.getValue()); } // Dependency for (Sentence s : select(aJCas, Sentence.class)) { Set<String> depRelIds = new TreeSet<>(); for (Dependency dep : selectCovered(Dependency.class, s)) { String depRelId = id(DEPENDENCY, dep); // LAPPS dependencies inherit from Relation which has no offsets Annotation depRel = view.newAnnotation(depRelId, Discriminators.Uri.DEPENDENCY); depRel.setLabel(dep.getDependencyType()); depRel.addFeature(Features.Dependency.GOVERNOR, id(TOKEN, dep.getGovernor())); depRel.addFeature(Features.Dependency.DEPENDENT, id(TOKEN, dep.getDependent())); depRelIds.add(depRelId); } if (!depRelIds.isEmpty()) { Annotation depStruct = view.newAnnotation( id(DEPENDENCY_STRUCTURE, s), Discriminators.Uri.DEPENDENCY_STRUCTURE, s.getBegin(), s.getEnd()); depStruct.addFeature(Features.DependencyStructure.DEPENDENCIES, depRelIds); } } // Constituents for (ROOT r : select(aJCas, ROOT.class)) { Set<String> constituents = new LinkedHashSet<>(); convertConstituent(view, r, constituents); Annotation phraseStruct = view.newAnnotation( id(PHRASE_STRUCTURE, r), Discriminators.Uri.PHRASE_STRUCTURE, r.getBegin(), r.getEnd()); phraseStruct.addFeature(Features.PhraseStructure.CONSTITUENTS, constituents); } try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) { String json = Serializer.toPrettyJson(container); IOUtils.write(json, docOS, encoding); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
public CurationContainer buildCurationContainer(BratAnnotatorModel aBModel) throws UIMAException, ClassNotFoundException, IOException, BratAnnotationException { CurationContainer curationContainer = new CurationContainer(); // initialize Variables SourceDocument sourceDocument = aBModel.getDocument(); Map<Integer, Integer> segmentBeginEnd = new HashMap<Integer, Integer>(); Map<Integer, Integer> segmentNumber = new HashMap<Integer, Integer>(); Map<String, Map<Integer, Integer>> segmentAdress = new HashMap<String, Map<Integer, Integer>>(); // get annotation documents List<AnnotationDocument> finishedAnnotationDocuments = new ArrayList<AnnotationDocument>(); for (AnnotationDocument annotationDocument : repository.listAnnotationDocuments(aBModel.getDocument())) { if (annotationDocument.getState().equals(AnnotationDocumentState.FINISHED)) { finishedAnnotationDocuments.add(annotationDocument); } } Map<String, JCas> jCases = new HashMap<String, JCas>(); AnnotationDocument randomAnnotationDocument = null; JCas mergeJCas; // get the correction/automation JCas for the logged in user if (aBModel.getMode().equals(Mode.AUTOMATION) || aBModel.getMode().equals(Mode.CORRECTION)) { jCases = listJcasesforCorrection(randomAnnotationDocument, sourceDocument, aBModel.getMode()); mergeJCas = getMergeCas(aBModel, sourceDocument, jCases, randomAnnotationDocument); String username = jCases.keySet().iterator().next(); updateSegment( aBModel, segmentBeginEnd, segmentNumber, segmentAdress, jCases.get(username), username, aBModel.getPreferences().getWindowSize()); } else { jCases = listJcasesforCuration( finishedAnnotationDocuments, randomAnnotationDocument, aBModel.getMode()); mergeJCas = getMergeCas(aBModel, sourceDocument, jCases, randomAnnotationDocument); updateSegment( aBModel, segmentBeginEnd, segmentNumber, segmentAdress, mergeJCas, CurationPanel.CURATION_USER, aBModel.getPreferences().getCurationWindowSize()); } List<Type> entryTypes = null; segmentAdress.put(CurationPanel.CURATION_USER, new HashMap<Integer, Integer>()); for (Sentence sentence : selectCovered(mergeJCas, Sentence.class, begin, end)) { segmentAdress.get(CurationPanel.CURATION_USER).put(sentence.getBegin(), getAddr(sentence)); } if (entryTypes == null) { entryTypes = getEntryTypes(mergeJCas, aBModel.getAnnotationLayers(), annotationService); } // for cross-sentences annotation, update the end of the segment if (firstload) { updateCrossSentAnnoList(segmentBeginEnd, jCases, entryTypes); firstload = false; } for (Integer begin : segmentBeginEnd.keySet()) { Integer end = segmentBeginEnd.get(begin); DiffResult diff = CasDiff2.doDiffSingle( annotationService, aBModel.getProject(), entryTypes, jCases, begin, end); SourceListView curationSegment = new SourceListView(); curationSegment.setBegin(begin); curationSegment.setEnd(end); if (diff.hasDifferences() || !diff.getIncompleteConfigurationSets().isEmpty()) { curationSegment.setSentenceState(SentenceState.DISAGREE); } else { curationSegment.setSentenceState(SentenceState.AGREE); } curationSegment.setSentenceNumber(segmentNumber.get(begin)); for (String username : segmentAdress.keySet()) { curationSegment.getSentenceAddress().put(username, segmentAdress.get(username).get(begin)); } curationContainer.getCurationViewByBegin().put(begin, curationSegment); } return curationContainer; }
public void convert(JCas aJCas, BufferedReader aReader) throws IOException { if (readPos) { try { posMappingProvider.configure(aJCas.getCas()); } catch (AnalysisEngineProcessException e) { throw new IOException(e); } } JCasBuilder doc = new JCasBuilder(aJCas); List<String[]> words; while ((words = readSentence(aReader)) != null) { if (words.isEmpty()) { // Ignore empty sentences. This can happen when there are multiple end-of-sentence // markers following each other. continue; } int sentenceBegin = doc.getPosition(); int sentenceEnd = sentenceBegin; // Tokens, Lemma, POS Map<Integer, Token> tokens = new HashMap<Integer, Token>(); List<SemanticPredicate> preds = new ArrayList<>(); for (String[] word : words) { // Read token Token token = doc.add(word[FORM], Token.class); tokens.put(Integer.valueOf(word[ID]), token); doc.add(" "); // Read lemma if (!UNUSED.equals(word[LEMMA]) && readLemma) { Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); lemma.setValue(word[LEMMA]); lemma.addToIndexes(); token.setLemma(lemma); } // Read part-of-speech tag if (!UNUSED.equals(word[POS]) && readPos) { Type posTag = posMappingProvider.getTagType(word[POS]); POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); pos.setPosValue(word[POS]); pos.addToIndexes(); token.setPos(pos); } // Read morphological features if (!UNUSED.equals(word[FEAT]) && readMorph) { MorphologicalFeatures morphtag = new MorphologicalFeatures(aJCas, token.getBegin(), token.getEnd()); morphtag.setValue(word[FEAT]); morphtag.addToIndexes(); } if (!UNUSED.equals(word[PRED]) && readSemanticPredicate) { SemanticPredicate pred = new SemanticPredicate(aJCas, token.getBegin(), token.getEnd()); pred.setCategory(word[PRED]); pred.addToIndexes(); preds.add(pred); } sentenceEnd = token.getEnd(); } // Dependencies if (readDependency) { for (String[] word : words) { if (!UNUSED.equals(word[DEPREL])) { int depId = Integer.valueOf(word[ID]); int govId = Integer.valueOf(word[HEAD]); // Model the root as a loop onto itself if (govId == 0) { Dependency rel = new ROOT(aJCas); rel.setGovernor(tokens.get(depId)); rel.setDependent(tokens.get(depId)); rel.setDependencyType(word[DEPREL]); rel.setBegin(rel.getDependent().getBegin()); rel.setEnd(rel.getDependent().getEnd()); rel.addToIndexes(); } else { Dependency rel = new Dependency(aJCas); rel.setGovernor(tokens.get(govId)); rel.setDependent(tokens.get(depId)); rel.setDependencyType(word[DEPREL]); rel.setBegin(rel.getDependent().getBegin()); rel.setEnd(rel.getDependent().getEnd()); rel.addToIndexes(); } } } } // Semantic arguments if (readSemanticPredicate) { // Get arguments for one predicate at a time for (int p = 0; p < preds.size(); p++) { List<SemanticArgument> args = new ArrayList<SemanticArgument>(); for (String[] word : words) { if (!UNUSED.equals(word[APRED + p])) { Token token = tokens.get(Integer.valueOf(word[ID])); SemanticArgument arg = new SemanticArgument(aJCas, token.getBegin(), token.getEnd()); arg.setRole(word[APRED + p]); arg.addToIndexes(); args.add(arg); } } SemanticPredicate pred = preds.get(p); pred.setArguments(FSCollectionFactory.createFSArray(aJCas, args)); } } // Sentence Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd); sentence.addToIndexes(); // Once sentence per line. doc.add("\n"); } doc.close(); }
public void convert(JCas aJCas, BufferedReader aReader) throws IOException { try { if (readPos) { posMappingProvider.configure(aJCas.getCas()); } if (readConstituent) { constituentMappingProvider.configure(aJCas.getCas()); } } catch (AnalysisEngineProcessException e) { throw new IOException(e); } Map<String, CoreferenceLink> chains = new HashMap<>(); JCasBuilder doc = new JCasBuilder(aJCas); List<String[]> words; while ((words = readSentence(aJCas, aReader)) != null) { if (words.isEmpty()) { // Ignore empty sentences. This can happen when there are multiple end-of-sentence // markers following each other. continue; } int sentenceBegin = doc.getPosition(); int sentenceEnd = sentenceBegin; StringBuilder parse = new StringBuilder(); // Tokens, Lemma, POS Map<Integer, Token> tokenById = new HashMap<Integer, Token>(); List<SemPred> preds = new ArrayList<>(); for (String[] word : words) { // Read token Token token = doc.add(word[FORM], Token.class); tokenById.put(Integer.valueOf(word[ID]), token); doc.add(" "); // Read lemma if (!UNUSED.equals(word[LEMMA]) && readLemma) { Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); lemma.setValue(word[LEMMA]); lemma.addToIndexes(); token.setLemma(lemma); } // Read part-of-speech tag if (!UNUSED.equals(word[POS]) && readPos) { Type posTag = posMappingProvider.getTagType(word[POS]); POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); pos.setPosValue(word[POS]); pos.addToIndexes(); token.setPos(pos); } if (!UNUSED.equals(word[PRED]) && readSemanticPredicate) { SemPred pred = new SemPred(aJCas, token.getBegin(), token.getEnd()); pred.setCategory(word[PRED]); pred.addToIndexes(); preds.add(pred); } if (!UNUSED.equals(word[PARSE]) && readConstituent) { String fixed = word[PARSE].replace("*", "(" + word[POS] + " " + word[FORM] + ")"); parse.append(fixed); } if (!UNUSED.equals(word[WORD_SENSE]) && readWordSense) { WordSense wordSense = new WordSense(aJCas, token.getBegin(), token.getEnd()); wordSense.setValue(word[WORD_SENSE]); wordSense.addToIndexes(); } if (!UNUSED.equals(word[word.length - 1]) && readCoreference) { String[] chainFragments = word[word.length - 1].split("\\|"); for (String chainFragment : chainFragments) { boolean beginning = chainFragment.startsWith("("); boolean ending = chainFragment.endsWith(")"); String chainId = chainFragment.substring( beginning ? 1 : 0, ending ? chainFragment.length() - 1 : chainFragment.length()); CoreferenceLink link = chains.get(chainId); if (beginning) { if (link == null) { link = new CoreferenceLink(aJCas); CoreferenceChain chain = new CoreferenceChain(aJCas); chain.setFirst(link); chain.addToIndexes(); } else { CoreferenceLink newLink = new CoreferenceLink(aJCas); link.setNext(newLink); link = newLink; } link.setReferenceType(chainId); link.setBegin(token.getBegin()); } if (ending) { link.setEnd(token.getEnd()); link.addToIndexes(); } chains.put(chainId, link); } } sentenceEnd = token.getEnd(); } // Named entities if (readNamedEntity) { int currentNeBegin = -1; String currentNeType = null; for (int i = 0; i < words.size(); i++) { String ne = words.get(i)[NAMED_ENTITIES]; boolean beginning = ne.startsWith("("); boolean ending = ne.endsWith(")"); // When a NE is beginning, we remember what the NE is and where it began if (beginning) { // The NE is beginning with "(" and either ending with "(" or "*", so we trim // the first and last character currentNeType = ne.substring(1, ne.length() - 1); currentNeBegin = i; } // We need to create an annotation if the current token is the end of an annotation if (ending) { // Determine begin and end of named entity int begin = tokenById.get(currentNeBegin).getBegin(); int end = tokenById.get(i).getEnd(); // Add named entity NamedEntity namedEntity = new NamedEntity(aJCas, begin, end); namedEntity.setValue(currentNeType); namedEntity.addToIndexes(); // Forget remembered named entity currentNeBegin = -1; currentNeType = null; } } } // Semantic arguments if (readSemanticPredicate) { // Get arguments for one predicate at a time for (int p = 0; p < preds.size(); p++) { SemPred pred = preds.get(p); List<SemArgLink> args = new ArrayList<>(); int currentArgBegin = -1; String currentArgType = null; for (int i = 0; i < words.size(); i++) { String ne = words.get(i)[APRED + p]; boolean beginning = ne.startsWith("("); boolean ending = ne.endsWith(")"); // When a arg is beginning, we remember what the NE is and where it began if (beginning) { // The arg is beginning with "(" and either ending with "(" or "*", so // we trim the first and last character currentArgType = ne.substring(1, ne.length() - 1); currentArgBegin = i; } // We need to create an annotation if the current token is the end of an // annotation if (ending) { // Determine begin and end of argument int begin = tokenById.get(currentArgBegin).getBegin(); int end = tokenById.get(i).getEnd(); // Add named entity unless it is a (V*) which has the same offsets as // the predicate if (!(pred.getBegin() == begin && pred.getEnd() == end)) { SemArg arg = new SemArg(aJCas, begin, end); arg.addToIndexes(); SemArgLink link = new SemArgLink(aJCas); link.setRole(currentArgType); link.setTarget(arg); args.add(link); } // Forget remembered arg currentArgBegin = -1; currentArgType = null; } } pred.setArguments(FSCollectionFactory.createFSArray(aJCas, args)); } } // Sentence Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd); sentence.addToIndexes(); converter.convertPennTree(sentence, PennTreeUtils.parsePennTree(parse.toString())); // Once sentence per line. doc.add("\n"); } doc.close(); }