public void getNext(JCas jCas) throws IOException, CollectionException {

    String[] split = nextLine.split("\t");
    checkEquals(3, split.length, "pmid" + split[0]);

    String pmid = unescapeCsv(split[0]).replaceAll("\"", ""),
        title = unescapeCsv(split[1]),
        txt = unescapeCsv(split[2]);

    Header header = new Header(jCas);
    header.setDocId(pmid);
    header.setTitle(title);
    header.addToIndexes();

    jCas.setDocumentText(title + (title.endsWith(".") ? " " : ". ") + txt);

    jCas.setDocumentLanguage("en");
  }
Exemple #2
0
  @BeforeClass
  public static void before() throws Exception {
    conn = getTestConn("MongoTest");

    // write one test document into Mongo
    JCas jCas = UimaTests.getTokenizedTestCas();

    BiolexiconDictTerm b = new BiolexiconDictTerm(jCas, 0, 11);
    b.setEntityId("theId");
    b.addToIndexes();

    Header h = new Header(jCas);
    h.setDocId("17");
    h.addToIndexes();

    JcasPipelineBuilder p = new JcasPipelineBuilder(jCas);
    p.add(MongoWriter.class, BlueUima.PARAM_DB_CONNECTION, conn);
    p.process();
  }
Exemple #3
0
  @Test
  public void testRead() throws Exception {

    // read
    List<JCas> l =
        asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn));
    assertEquals(1, l.size());

    JCas jCas = l.get(0);
    assertEquals(UimaTests.TEST_SENTENCE, jCas.getDocumentText());

    for (Annotation a : JCasUtil.select(jCas, Annotation.class)) {
      System.out.println(a);
    }

    BiolexiconDictTerm b = selectSingle(jCas, BiolexiconDictTerm.class);
    assertNotNull(b);
    assertEquals("theId", b.getEntityId());

    Header h = selectSingle(jCas, Header.class);
    assertNotNull(h);
    assertEquals("17", h.getDocId());
  }
  @Test
  public void testFormat() throws Exception {

    File tmp = File.createTempFile("FromFilelistReaderTest2", null);
    TextFileWriter writer = new TextFileWriter(tmp);
    writer.addLine(BLUE_UTILS_TEST_BASE + "testData/1.pdf");

    writer.close();

    List<JCas> jCases =
        asList(
            createReader(
                FromFilelistReader.class,
                PARAM_INPUT_FILE,
                tmp.getAbsolutePath(),
                BlueUima.PARAM_FORMAT,
                true));
    assertEquals(1, jCases.size());
    assertEquals(1, getHeaderIntDocId(jCases.get(0)));

    Header header = selectSingle(jCases.get(0), Header.class);
    assertEquals(BLUE_UTILS_TEST_BASE + "testData/1.pdf", header.getSource());
  }