@Override public void initialize() throws ResourceInitializationException { this.queueName = (String) getConfigParameterValue(PARAM_QUEUE_NAME); this.mLanguage = Lang.forName((String) getConfigParameterValue(PARAM_LANGUAGE)); this.streamName = (String) getConfigParameterValue(PARAM_NAME); documentQueue = QueueRegistry.getInstance().getQueue(queueName); this.mCurrentIndex = 0; this.cumulatedLength = 0; }
@Override public void getNext(CAS cas) throws IOException, CollectionException { this.cumulatedLength += currentDoc.getText().length(); logger.info( "[Stream {}] Processing document {}: {} (total length processed: {})", this.streamName, this.mCurrentIndex, this.currentDoc.getUri(), this.cumulatedLength); SourceDocumentInformation sdi; try { sdi = new SourceDocumentInformation(cas.getJCas()); sdi.setUri(currentDoc.getUri()); cas.setDocumentLanguage(mLanguage.getCode()); cas.setDocumentText(currentDoc.getText()); sdi.setDocumentSize(currentDoc.getText().length()); sdi.setCumulatedDocumentSize(this.cumulatedLength); sdi.setBegin(0); sdi.setEnd(currentDoc.getText().length()); sdi.setOffsetInSource(0); sdi.setDocumentIndex(mCurrentIndex); /* * Cannot be known in case of streaming */ sdi.setCorpusSize(-1); sdi.setNbDocuments(-1); // Cannot know if this is the last sdi.setLastSegment(false); sdi.addToIndexes(); this.mCurrentIndex++; } catch (CASException e) { throw new CollectionException(e); } }