Example #1
0
 public static String getTermSuiteCasFileName(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
   if (it.hasNext()) {
     SourceDocumentInformation sdi = (SourceDocumentInformation) it.next();
     Iterator<String> iterator =
         Splitter.on("/").split(sdi.getUri() == null ? "(no uri)" : sdi.getUri()).iterator();
     String name = null;
     while (iterator.hasNext()) name = iterator.next();
     return name;
   } else return null;
 }
Example #2
0
 public static void initJCasSDI(JCas jCas, String language, String text, String uri) {
   SourceDocumentInformation sdi;
   sdi = new SourceDocumentInformation(jCas);
   sdi.setBegin(0);
   sdi.setUri(uri);
   sdi.setEnd(text.length());
   sdi.setOffsetInSource(0);
   sdi.addToIndexes();
 }
  @Override
  public void getNext(CAS cas) throws IOException, CollectionException {
    this.cumulatedLength += currentDoc.getText().length();
    logger.info(
        "[Stream {}] Processing document {}: {} (total length processed: {})",
        this.streamName,
        this.mCurrentIndex,
        this.currentDoc.getUri(),
        this.cumulatedLength);

    SourceDocumentInformation sdi;
    try {

      sdi = new SourceDocumentInformation(cas.getJCas());
      sdi.setUri(currentDoc.getUri());
      cas.setDocumentLanguage(mLanguage.getCode());
      cas.setDocumentText(currentDoc.getText());
      sdi.setDocumentSize(currentDoc.getText().length());
      sdi.setCumulatedDocumentSize(this.cumulatedLength);
      sdi.setBegin(0);
      sdi.setEnd(currentDoc.getText().length());
      sdi.setOffsetInSource(0);
      sdi.setDocumentIndex(mCurrentIndex);

      /*
       * Cannot be known in case of streaming
       */
      sdi.setCorpusSize(-1);
      sdi.setNbDocuments(-1);

      // Cannot know if this is the last
      sdi.setLastSegment(false);

      sdi.addToIndexes();
      this.mCurrentIndex++;
    } catch (CASException e) {
      throw new CollectionException(e);
    }
  }