private void runTest( String language, String variant, String testDocument, String[] tags, String[] tagClasses) throws Exception { AnalysisEngine engine = createEngine(LbjPosTagger.class); JCas jcas = TestRunner.runTest(engine, language, testDocument); AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class)); }
private JCas runTest( String language, String variant, String testDocument, String[] tags, String[] tagClasses) throws Exception { AnalysisEngine engine = createEngine( OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_VARIANT, variant, OpenNlpPosTagger.PARAM_PRINT_TAGSET, true); JCas jcas = TestRunner.runTest(engine, language, testDocument); AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class)); return jcas; }
@Test public void testEnglish() throws Exception { long maxMemory = Runtime.getRuntime().maxMemory(); Assume.assumeTrue("Insufficient max memory: " + maxMemory, maxMemory > 3700000000l); // Run the test pipeline. Note the full stop at the end of a sentence is preceded by a // whitespace. This is necessary for it to be detected as a separate token! JCas jcas = runTest("en", null, "SAP where John Doe works is in Germany ."); // Define the reference data that we expect to get back from the test String[] namedEntity = { "[ 10, 18]NamedEntity(PERSON) (John Doe)", "[ 31, 38]NamedEntity(GPE) (Germany)" }; // Compare the annotations created in the pipeline to the reference data AssertAnnotations.assertNamedEntity(namedEntity, select(jcas, NamedEntity.class)); }
@Ignore("We don't have these models integrated yet") @Test public void testPortuguese() throws Exception { String[] bosqueTags = new String[] { "?", "adj", "adv", "art", "conj-c", "conj-s", "ec", "in", "n", "num", "pp", "pron-det", "pron-indp", "pron-pers", "prop", "prp", "punc", "v-fin", "v-ger", "v-inf", "v-pcp", "vp" }; JCas jcas = runTest( "pt", null, "Este é um teste .", new String[] {"pron-det", "v-fin", "art", "n", "punc"}, new String[] {"PR", "V", "ART", "NN", "PUNC"}); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest( "pt", "maxent", "Este é um teste .", new String[] {"pron-det", "v-fin", "art", "n", "punc"}, new String[] {"PR", "V", "ART", "NN", "PUNC"}); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest( "pt", "perceptron", "Este é um teste .", new String[] {"pron-det", "v-fin", "art", "n", "punc"}, new String[] {"PR", "V", "ART", "NN", "PUNC"}); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest( "pt", "mm-maxent", "Este é um teste .", new String[] {"PROSUB", "V", "ART", "N", "."}, new String[] {"POS", "POS", "POS", "POS", "POS"}); // AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest( "pt", "mm-perceptron", "Este é um teste .", new String[] {"PROSUB", "V", "ART", "N", "."}, new String[] {"POS", "POS", "POS", "POS", "POS"}); // AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); jcas = runTest( "pt", "cogroo", "Este é um teste .", new String[] {"pron-det", "v-fin", "artm", "nm", "."}, new String[] {"POS", "POS", "POS", "POS", "POS"}); AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas); }