Ejemplo n.º 1
0
  private void runTest(
      String language, String variant, String testDocument, String[] tags, String[] tagClasses)
      throws Exception {
    AnalysisEngine engine = createEngine(LbjPosTagger.class);

    JCas jcas = TestRunner.runTest(engine, language, testDocument);

    AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class));
  }
Ejemplo n.º 2
0
  private JCas runTest(
      String language, String variant, String testDocument, String[] tags, String[] tagClasses)
      throws Exception {
    AnalysisEngine engine =
        createEngine(
            OpenNlpPosTagger.class,
            OpenNlpPosTagger.PARAM_VARIANT,
            variant,
            OpenNlpPosTagger.PARAM_PRINT_TAGSET,
            true);

    JCas jcas = TestRunner.runTest(engine, language, testDocument);

    AssertAnnotations.assertPOS(tagClasses, tags, select(jcas, POS.class));

    return jcas;
  }
  @Test
  public void testEnglish() throws Exception {
    long maxMemory = Runtime.getRuntime().maxMemory();
    Assume.assumeTrue("Insufficient max memory: " + maxMemory, maxMemory > 3700000000l);

    // Run the test pipeline. Note the full stop at the end of a sentence is preceded by a
    // whitespace. This is necessary for it to be detected as a separate token!
    JCas jcas = runTest("en", null, "SAP where John Doe works is in Germany .");

    // Define the reference data that we expect to get back from the test
    String[] namedEntity = {
      "[ 10, 18]NamedEntity(PERSON) (John Doe)", "[ 31, 38]NamedEntity(GPE) (Germany)"
    };

    // Compare the annotations created in the pipeline to the reference data
    AssertAnnotations.assertNamedEntity(namedEntity, select(jcas, NamedEntity.class));
  }
Ejemplo n.º 4
0
  @Ignore("We don't have these models integrated yet")
  @Test
  public void testPortuguese() throws Exception {
    String[] bosqueTags =
        new String[] {
          "?",
          "adj",
          "adv",
          "art",
          "conj-c",
          "conj-s",
          "ec",
          "in",
          "n",
          "num",
          "pp",
          "pron-det",
          "pron-indp",
          "pron-pers",
          "prop",
          "prp",
          "punc",
          "v-fin",
          "v-ger",
          "v-inf",
          "v-pcp",
          "vp"
        };

    JCas jcas =
        runTest(
            "pt",
            null,
            "Este é um teste .",
            new String[] {"pron-det", "v-fin", "art", "n", "punc"},
            new String[] {"PR", "V", "ART", "NN", "PUNC"});

    AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas);

    jcas =
        runTest(
            "pt",
            "maxent",
            "Este é um teste .",
            new String[] {"pron-det", "v-fin", "art", "n", "punc"},
            new String[] {"PR", "V", "ART", "NN", "PUNC"});

    AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas);

    jcas =
        runTest(
            "pt",
            "perceptron",
            "Este é um teste .",
            new String[] {"pron-det", "v-fin", "art", "n", "punc"},
            new String[] {"PR", "V", "ART", "NN", "PUNC"});

    AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas);

    jcas =
        runTest(
            "pt",
            "mm-maxent",
            "Este é um teste .",
            new String[] {"PROSUB", "V", "ART", "N", "."},
            new String[] {"POS", "POS", "POS", "POS", "POS"});

    // AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas);

    jcas =
        runTest(
            "pt",
            "mm-perceptron",
            "Este é um teste .",
            new String[] {"PROSUB", "V", "ART", "N", "."},
            new String[] {"POS", "POS", "POS", "POS", "POS"});

    // AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas);

    jcas =
        runTest(
            "pt",
            "cogroo",
            "Este é um teste .",
            new String[] {"pron-det", "v-fin", "artm", "nm", "."},
            new String[] {"POS", "POS", "POS", "POS", "POS"});

    AssertAnnotations.assertTagset(POS.class, "bosque", bosqueTags, jcas);
  }