public void getNext(JCas jCas) throws IOException, CollectionException { String[] split = nextLine.split("\t"); checkEquals(3, split.length, "pmid" + split[0]); String pmid = unescapeCsv(split[0]).replaceAll("\"", ""), title = unescapeCsv(split[1]), txt = unescapeCsv(split[2]); Header header = new Header(jCas); header.setDocId(pmid); header.setTitle(title); header.addToIndexes(); jCas.setDocumentText(title + (title.endsWith(".") ? " " : ". ") + txt); jCas.setDocumentLanguage("en"); }
@BeforeClass public static void before() throws Exception { conn = getTestConn("MongoTest"); // write one test document into Mongo JCas jCas = UimaTests.getTokenizedTestCas(); BiolexiconDictTerm b = new BiolexiconDictTerm(jCas, 0, 11); b.setEntityId("theId"); b.addToIndexes(); Header h = new Header(jCas); h.setDocId("17"); h.addToIndexes(); JcasPipelineBuilder p = new JcasPipelineBuilder(jCas); p.add(MongoWriter.class, BlueUima.PARAM_DB_CONNECTION, conn); p.process(); }
@Test public void testRead() throws Exception { // read List<JCas> l = asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn)); assertEquals(1, l.size()); JCas jCas = l.get(0); assertEquals(UimaTests.TEST_SENTENCE, jCas.getDocumentText()); for (Annotation a : JCasUtil.select(jCas, Annotation.class)) { System.out.println(a); } BiolexiconDictTerm b = selectSingle(jCas, BiolexiconDictTerm.class); assertNotNull(b); assertEquals("theId", b.getEntityId()); Header h = selectSingle(jCas, Header.class); assertNotNull(h); assertEquals("17", h.getDocId()); }
@Test public void testFormat() throws Exception { File tmp = File.createTempFile("FromFilelistReaderTest2", null); TextFileWriter writer = new TextFileWriter(tmp); writer.addLine(BLUE_UTILS_TEST_BASE + "testData/1.pdf"); writer.close(); List<JCas> jCases = asList( createReader( FromFilelistReader.class, PARAM_INPUT_FILE, tmp.getAbsolutePath(), BlueUima.PARAM_FORMAT, true)); assertEquals(1, jCases.size()); assertEquals(1, getHeaderIntDocId(jCases.get(0))); Header header = selectSingle(jCases.get(0), Header.class); assertEquals(BLUE_UTILS_TEST_BASE + "testData/1.pdf", header.getSource()); }