Ejemplos de CAS.setDocumentText en Java

Lenguaje de programación: Java

Namespace/Package Name: org.apache.uima.cas

Clase / Tipo: CAS

Método / Función: setDocumentText

Ejemplos en hotexamples.com: 7

Java CAS.setDocumentText - 7 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de org.apache.uima.cas.CAS.setDocumentText extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

getJCas(18)

getTypeSystem(9)

setDocumentText(7)

getDocumentText(6)

getAnnotationIndex(5)

release(3)

removeFsFromIndexes(2)

setDocumentLanguage(2)

getIndexRepository(2)

createAnnotation(2)

getView(2)

reset(2)

addFsToIndexes(1)

getViewIterator(1)

createView(1)

createFilteredIterator(1)

setSofaDataURI(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: Pdf2CasConverter.java Proyecto: renaud/dkpro-core

  @Override
  protected void endDocument(final PDDocument aPdf) throws IOException {
    cas.setDocumentText(text.toString());

    if (log.isTraceEnabled()) {
      log.trace("</document>");
    }
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: SingleFileCollectionReader.java Proyecto: mefozzy/NLP-Cloud

  @Override
  public void getNext(final CAS aCAS) throws IOException, CollectionException {
    TikaProcessor processor = new TikaProcessor();
    try {
      processor = TikaProcessor.newInstance(file);
    } catch (Exception e) {
      ExceptionHandler.logAndRethrow(logger, "TikaProcessor: ", e);
    }

    String documentText = processor.getText();
    if (documentText == null || documentText.length() == 0) {
      ExceptionHandler.logAndThrow(logger, "Document text is null or empty");
    }
    aCAS.setDocumentText(documentText);

    String textLanguage = processor.getLanguage();
    if (!textLanguage.contains("ru")) {
      ExceptionHandler.logAndThrow(logger, "Document language is not russian");
    }
    aCAS.setDocumentLanguage(textLanguage);
  }

Ejemplo n.º 3

Mostrar archivo

Archivo: AsClient_.java Proyecto: webscience/K-people

  public void run() throws Exception {
    //		create Asynchronous Client API and initialize it
    uimaAsEngine = new BaseUIMAAsynchronousEngine_impl();

    //		callback
    //		uimaAsEngine.addStatusCallbackListener(new StatusCallbackListener());

    initializeUimaAsEngine(uimaAsEngine);

    String filePath =
        "C:\\WebScience\\Progetti\\K-People\\OntologyController_UIMA\\apache-uima\\examples\\src\\it\\webscience\\uima\\event-2031.xml";
    String xml = readFile(filePath);

    // 		get an empty CAS from the Cas pool
    CAS cas = uimaAsEngine.getCAS();

    //		Initialize it with input data
    cas.setDocumentText(xml);

    //		Send Cas to service for processing
    uimaAsEngine.sendCAS(cas);
  }

Ejemplo n.º 4

Mostrar archivo

Archivo: StreamingCollectionReader.java Proyecto: termsuite/termsuite-core

  @Override
  public void getNext(CAS cas) throws IOException, CollectionException {
    this.cumulatedLength += currentDoc.getText().length();
    logger.info(
        "[Stream {}] Processing document {}: {} (total length processed: {})",
        this.streamName,
        this.mCurrentIndex,
        this.currentDoc.getUri(),
        this.cumulatedLength);

    SourceDocumentInformation sdi;
    try {

      sdi = new SourceDocumentInformation(cas.getJCas());
      sdi.setUri(currentDoc.getUri());
      cas.setDocumentLanguage(mLanguage.getCode());
      cas.setDocumentText(currentDoc.getText());
      sdi.setDocumentSize(currentDoc.getText().length());
      sdi.setCumulatedDocumentSize(this.cumulatedLength);
      sdi.setBegin(0);
      sdi.setEnd(currentDoc.getText().length());
      sdi.setOffsetInSource(0);
      sdi.setDocumentIndex(mCurrentIndex);

      /*
       * Cannot be known in case of streaming
       */
      sdi.setCorpusSize(-1);
      sdi.setNbDocuments(-1);

      // Cannot know if this is the last
      sdi.setLastSegment(false);

      sdi.addToIndexes();
      this.mCurrentIndex++;
    } catch (CASException e) {
      throw new CollectionException(e);
    }
  }

Ejemplo n.º 5

Mostrar archivo

Archivo: VectorSpaceRetrieval.java Proyecto: soumya-batra/hw4-soumyab

  public static void main(String[] args) throws Exception {

    String sLine;
    long startTime = System.currentTimeMillis();

    URL descUrl =
        VectorSpaceRetrieval.class.getResource(
            "/descriptors/retrievalsystem/VectorSpaceRetrieval.xml");
    if (descUrl == null) {
      throw new IllegalArgumentException("Error opening VectorSpaceRetrieval.xml");
    }
    // create AnalysisEngine
    XMLInputSource input = new XMLInputSource(descUrl);
    AnalysisEngineDescription desc =
        UIMAFramework.getXMLParser().parseAnalysisEngineDescription(input);
    AnalysisEngine anAnalysisEngine = UIMAFramework.produceAnalysisEngine(desc);
    CAS aCas = anAnalysisEngine.newCAS();

    URL docUrl = VectorSpaceRetrieval.class.getResource("/data/documents.txt");
    if (docUrl == null) {
      throw new IllegalArgumentException("Error opening data/documents.txt");
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(docUrl.openStream()));
    while ((sLine = br.readLine()) != null) {
      aCas.setDocumentText(sLine);
      anAnalysisEngine.process(aCas);
      aCas.reset();
    }
    br.close();
    br = null;
    anAnalysisEngine.collectionProcessComplete();
    anAnalysisEngine.destroy();
    long endTime = System.currentTimeMillis();

    double totalTime = (endTime - startTime) / 1000.0;
    System.out.println("Total time taken: " + totalTime);
  }

Ejemplo n.º 6

Mostrar archivo

Archivo: HtmlConverterXmlTest.java Proyecto: renaud/ruta-core

  @Test
  public void test() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(
        HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"});
    aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$");
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("$Some content$$More content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    assertEquals("$Some content$$More content.", iterator.next().getCoveredText());
    assertEquals("$Some content", iterator.next().getCoveredText());
    assertEquals("$", iterator.next().getCoveredText());
    assertEquals("$More content.", iterator.next().getCoveredText());

    cas.release();
  }

Ejemplo n.º 7

Mostrar archivo

Archivo: HtmlConverterXmlTest.java Proyecto: renaud/ruta-core

  @Test
  public void testExpandOffsets() throws Exception {
    String html = "<Parent>\n";
    html += "<Child1>Some content</Child1>\n";
    html += "<Child2 attribute=“someValue” />\n";
    html += "<Child3>More content.</Child3>\n";
    html += "</Parent>\n";

    URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml");
    if (urlA == null) {
      urlA =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml");
    }

    URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml");
    if (urlC == null) {
      urlC =
          HtmlAnnotator.class
              .getClassLoader()
              .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml");
    }

    XMLInputSource inA = new XMLInputSource(urlA);
    ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA);
    AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA);
    aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false);
    aeA.reconfigure();

    XMLInputSource inC = new XMLInputSource(urlC);
    ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC);
    AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true);
    aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true);
    aeC.reconfigure();

    CAS cas = aeA.newCAS();
    Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG");
    Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets");
    AnnotationIndex<AnnotationFS> ai = null;
    FSIterator<AnnotationFS> iterator = null;

    cas.setDocumentText(html);
    aeA.process(cas);
    aeC.process(cas);

    CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW);

    assertEquals("Some contentMore content.", plainTextCas.getDocumentText());

    ai = plainTextCas.getAnnotationIndex(tagType);
    iterator = ai.iterator();
    assertEquals(4, ai.size());
    AnnotationFS next = null;
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some contentMore content.", next.getCoveredText());
    next = iterator.next();
    assertEquals(false, next.getBooleanValue(expandedFeature));
    assertEquals("Some content", next.getCoveredText());
    next = iterator.next();
    boolean b1 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    next = iterator.next();
    boolean b2 = next.getBooleanValue(expandedFeature);
    assertEquals("More content.", next.getCoveredText());
    // for one of these two annotation (with same offsets) the feature must be set to true
    assertEquals(true, b1 || b2);

    cas.release();
  }