Java CAS.setDocumentTextの例

プログラミング言語: Java

名前空間/パッケージ名: org.apache.uima.cas

クラス/型: CAS

メソッド/関数: setDocumentText

hotexamples.comのコード掲載数: 7

Java CAS.setDocumentText - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのorg.apache.uima.cas.CAS.setDocumentTextの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

getJCas(18)

getTypeSystem(9)

setDocumentText(7)

getDocumentText(6)

getAnnotationIndex(5)

release(3)

removeFsFromIndexes(2)

setDocumentLanguage(2)

getIndexRepository(2)

createAnnotation(2)

getView(2)

reset(2)

addFsToIndexes(1)

getViewIterator(1)

createView(1)

createFilteredIterator(1)

setSofaDataURI(1)

コード例 #1

ファイルを表示

ファイル: Pdf2CasConverter.java プロジェクト: renaud/dkpro-core

  @Override
  protected void endDocument(final PDDocument aPdf) throws IOException {
    cas.setDocumentText(text.toString());

    if (log.isTraceEnabled()) {
      log.trace("</document>");
    }
  }

コード例 #2

ファイルを表示

ファイル: SingleFileCollectionReader.java プロジェクト: mefozzy/NLP-Cloud

  @Override
  public void getNext(final CAS aCAS) throws IOException, CollectionException {
    TikaProcessor processor = new TikaProcessor();
    try {
      processor = TikaProcessor.newInstance(file);
    } catch (Exception e) {
      ExceptionHandler.logAndRethrow(logger, "TikaProcessor: ", e);
    }

    String documentText = processor.getText();
    if (documentText == null || documentText.length() == 0) {
      ExceptionHandler.logAndThrow(logger, "Document text is null or empty");
    }
    aCAS.setDocumentText(documentText);

    String textLanguage = processor.getLanguage();
    if (!textLanguage.contains("ru")) {
      ExceptionHandler.logAndThrow(logger, "Document language is not russian");
    }
    aCAS.setDocumentLanguage(textLanguage);
  }

コード例 #3

ファイルを表示

ファイル: AsClient_.java プロジェクト: webscience/K-people

  public void run() throws Exception {
    //		create Asynchronous Client API and initialize it
    uimaAsEngine = new BaseUIMAAsynchronousEngine_impl();

    //		callback
    //		uimaAsEngine.addStatusCallbackListener(new StatusCallbackListener());

    initializeUimaAsEngine(uimaAsEngine);

    String filePath =
        "C:\\WebScience\\Progetti\\K-People\\OntologyController_UIMA\\apache-uima\\examples\\src\\it\\webscience\\uima\\event-2031.xml";
    String xml = readFile(filePath);

    // 		get an empty CAS from the Cas pool
    CAS cas = uimaAsEngine.getCAS();

    //		Initialize it with input data
    cas.setDocumentText(xml);

    //		Send Cas to service for processing
    uimaAsEngine.sendCAS(cas);
  }

コード例 #4

ファイルを表示

ファイル: StreamingCollectionReader.java プロジェクト: termsuite/termsuite-core

  @Override
  public void getNext(CAS cas) throws IOException, CollectionException {
    this.cumulatedLength += currentDoc.getText().length();
    logger.info(
        "[Stream {}] Processing document {}: {} (total length processed: {})",
        this.streamName,
        this.mCurrentIndex,
        this.currentDoc.getUri(),
        this.cumulatedLength);

    SourceDocumentInformation sdi;
    try {

      sdi = new SourceDocumentInformation(cas.getJCas());
      sdi.setUri(currentDoc.getUri());
      cas.setDocumentLanguage(mLanguage.getCode());
      cas.setDocumentText(currentDoc.getText());
      sdi.setDocumentSize(currentDoc.getText().length());
      sdi.setCumulatedDocumentSize(this.cumulatedLength);
      sdi.setBegin(0);
      sdi.setEnd(currentDoc.getText().length());
      sdi.setOffsetInSource(0);
      sdi.setDocumentIndex(mCurrentIndex);

      /*
       * Cannot be known in case of streaming
       */
      sdi.setCorpusSize(-1);
      sdi.setNbDocuments(-1);

      // Cannot know if this is the last
      sdi.setLastSegment(false);

      sdi.addToIndexes();
      this.mCurrentIndex++;
    } catch (CASException e) {
      throw new CollectionException(e);
    }
  }

コード例 #5

ファイルを表示

ファイル: VectorSpaceRetrieval.java プロジェクト: soumya-batra/hw4-soumyab

  public static void main(String[] args) throws Exception {

    String sLine;
    long startTime = System.currentTimeMillis();

    URL descUrl =
        VectorSpaceRetrieval.class.getResource(
            "/descriptors/retrievalsystem/VectorSpaceRetrieval.xml");
    if (descUrl == null) {
      throw new IllegalArgumentException("Error opening VectorSpaceRetrieval.xml");
    }
    // create AnalysisEngine
    XMLInputSource input = new XMLInputSource(descUrl);
    AnalysisEngineDescription desc =
        UIMAFramework.getXMLParser().parseAnalysisEngineDescription(input);
    AnalysisEngine anAnalysisEngine = UIMAFramework.produceAnalysisEngine(desc);
    CAS aCas = anAnalysisEngine.newCAS();

    URL docUrl = VectorSpaceRetrieval.class.getResource("/data/documents.txt");
    if (docUrl == null) {
      throw new IllegalArgumentException("Error opening data/documents.txt");
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(docUrl.openStream()));
    while ((sLine = br.readLine()) != null) {
      aCas.setDocumentText(sLine);
      anAnalysisEngine.process(aCas);
      aCas.reset();
    }
    br.close();
    br = null;
    anAnalysisEngine.collectionProcessComplete();
    anAnalysisEngine.destroy();
    long endTime = System.currentTimeMillis();

    double totalTime = (endTime - startTime) / 1000.0;
    System.out.println("Total time taken: " + totalTime);
  }

コード例 #6

ファイルを表示

ファイル: HtmlConverterXmlTest.java プロジェクト: renaud/ruta-core

  @Test
  public void test() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(
        HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"});
    aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$");
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("$Some content$$More content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    assertEquals("$Some content$$More content.", iterator.next().getCoveredText());
    assertEquals("$Some content", iterator.next().getCoveredText());
    assertEquals("$", iterator.next().getCoveredText());
    assertEquals("$More content.", iterator.next().getCoveredText());

    cas.release();
  }

コード例 #7

ファイルを表示

ファイル: HtmlConverterXmlTest.java プロジェクト: renaud/ruta-core

  @Test
  public void testExpandOffsets() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true);
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("Some contentMore content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    AnnotationFS next = null;
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some contentMore content.", next.getCoveredText());
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some content", next.getCoveredText());
    next = iterator.next();
    boolean b1 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    next = iterator.next();
    boolean b2 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    // for one of these two annotation (with same offsets) the feature must be set to true
    assertEquals(true, b1 || b2);

    cas.release();
  }