public static void initJCasSDI(JCas jCas, String language, String text, String uri) { SourceDocumentInformation sdi; sdi = new SourceDocumentInformation(jCas); sdi.setBegin(0); sdi.setUri(uri); sdi.setEnd(text.length()); sdi.setOffsetInSource(0); sdi.addToIndexes(); }
@Override public void getNext(CAS cas) throws IOException, CollectionException { this.cumulatedLength += currentDoc.getText().length(); logger.info( "[Stream {}] Processing document {}: {} (total length processed: {})", this.streamName, this.mCurrentIndex, this.currentDoc.getUri(), this.cumulatedLength); SourceDocumentInformation sdi; try { sdi = new SourceDocumentInformation(cas.getJCas()); sdi.setUri(currentDoc.getUri()); cas.setDocumentLanguage(mLanguage.getCode()); cas.setDocumentText(currentDoc.getText()); sdi.setDocumentSize(currentDoc.getText().length()); sdi.setCumulatedDocumentSize(this.cumulatedLength); sdi.setBegin(0); sdi.setEnd(currentDoc.getText().length()); sdi.setOffsetInSource(0); sdi.setDocumentIndex(mCurrentIndex); /* * Cannot be known in case of streaming */ sdi.setCorpusSize(-1); sdi.setNbDocuments(-1); // Cannot know if this is the last sdi.setLastSegment(false); sdi.addToIndexes(); this.mCurrentIndex++; } catch (CASException e) { throw new CollectionException(e); } }