Exemplos de AnalysisEngineFactory em Java, exemplos de org.apache.uima.fit.factory.AnalysisEngineFactory em Java

Exemplo n.º 1

0

Exibir arquivo

Arquivo: LuceneNGramMetaCollectorTest.java Projeto: niklas-meyer/dkpro-tc

  @Test
  public void emptyDocumentTest() throws Exception {
    File tmpDir = folder.newFolder();

    CollectionReaderDescription reader =
        CollectionReaderFactory.createReaderDescription(
            TextReader.class,
            TextReader.PARAM_SOURCE_LOCATION,
            "src/test/resources/empty/",
            TextReader.PARAM_LANGUAGE,
            "en",
            TextReader.PARAM_PATTERNS,
            "empty*.txt");

    AnalysisEngineDescription segmenter =
        AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);

    AnalysisEngineDescription metaCollector =
        AnalysisEngineFactory.createEngineDescription(
            LuceneNGramMetaCollector.class, LuceneNGramDFE.PARAM_LUCENE_DIR, tmpDir);

    for (JCas jcas : new JCasIterable(reader, segmenter, metaCollector)) {
      //            System.out.println(jcas.getDocumentText().length());
    }
  }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: UimaSentenceIterator.java Projeto: ozborn/deeplearning4j

 /**
  * Creates a uima sentence iterator with the given path
  *
  * @param path the path to the root directory or file to read from
  * @return the uima sentence iterator for the given root dir or file
  * @throws Exception
  */
 public static SentenceIterator createWithPath(String path) throws Exception {
   return new UimaSentenceIterator(
       path,
       new UimaResource(
           AnalysisEngineFactory.createEngine(
               AnalysisEngineFactory.createEngineDescription(
                   TokenizerAnnotator.getDescription(), SentenceAnnotator.getDescription()))));
 }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ExtendedPipeline.java Projeto: Horsmann/TextAnalyticsPracticalClass

 public static void main(String[] args) throws Exception {
   SimplePipeline.runPipeline(
       CollectionReaderFactory.createReader(
           ReaderExample.class,
           ReaderExample.PARAM_INPUT_FILE,
           "src/test/resources/test/input.txt"),
       AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class),
       AnalysisEngineFactory.createEngineDescription(BaselineExample.class),
       AnalysisEngineFactory.createEngineDescription(EvaluatorExample.class));
 }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: UimaSentenceIterator.java Projeto: ozborn/deeplearning4j

  /**
   * Return a a sentence segmenter
   *
   * @return a sentence segmenter
   */
  public static AnalysisEngine segmenter() {
    try {
      if (defaultAnalysisEngine == null)
        defaultAnalysisEngine =
            AnalysisEngineFactory.createEngine(
                AnalysisEngineFactory.createEngineDescription(SentenceAnnotator.getDescription()));

      return defaultAnalysisEngine;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: WhiteTextCollectionReaderTest.java Projeto: alainloisel/bluima

  public static void main(String[] args) throws Exception {

    CollectionReader cr = createReader(WhiteTextCollectionReader.class);

    SimplePipeline.runPipeline(
        cr, AnalysisEngineFactory.createEngine(WhiteTextCollectionReaderTest.class));
  }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: NERTest.java Projeto: habernal/german-ner-test

  public static void main(String[] args) throws Exception {
    JCas jCas = JCasFactory.createJCas();
    jCas.setDocumentLanguage("de");
    jCas.setDocumentText(
        "Die Fossillagerstätte Geiseltal befindet sich im ehemaligen Braunkohlerevier des Geiseltales südlich der Stadt Halle in Sachsen-Anhalt. Sie ist eine bedeutende Fundstelle heute ausgestorbener Pflanzen und Tiere aus der Zeit des Mittleren Eozäns vor 48 bis 41 Millionen Jahren. Im Geiseltal wurde nachweislich seit 1698 erstmals Kohle gefördert, die ersten Fossilien kamen aber erst Anfang des 20. Jahrhunderts eher zufällig zu Tage. Planmäßige wissenschaftliche Ausgrabungen begannen 1925 seitens der Martin-Luther-Universität Halle-Wittenberg. Unterbrochen durch den Zweiten Weltkrieg, können die Untersuchungen in zwei Forschungsphasen untergliedert werden. Aufgrund der zunehmenden Auskohlung der Rohstofflager kamen die Ausgrabungen Mitte der 1980er allmählich zum Erliegen und endeten endgültig zu Beginn des dritten Jahrtausends.");

    SimplePipeline.runPipeline(
        jCas,
        AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class),
        AnalysisEngineFactory.createEngineDescription(StanfordNamedEntityRecognizer.class),
        AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class));

    for (NamedEntity ne : JCasUtil.select(jCas, NamedEntity.class)) {
      System.out.println("Found NE: " + ne.getValue() + ", " + ne.getCoveredText());
    }
  }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: BookIndexPhraseAggregatorTest.java Projeto: eric011/dkpro-keyphrases

  //	@Test
  public void allAggregationStrategies_1segment_expectCorrectRanking() throws Exception {
    String testDocument = "foo bar baz";

    List<Class<? extends AggregationStrategy>> aggregationStrategies =
        new ArrayList<Class<? extends AggregationStrategy>>();
    aggregationStrategies.add(MaximumAggregation.class);

    for (Class<? extends AggregationStrategy> aggregationStrategy : aggregationStrategies) {

      AnalysisEngineDescription aed =
          AnalysisEngineFactory.createPrimitiveDescription(
              BookIndexPhraseAggregationAnnotator.class);

      bindResource(aed, RankedPhraseAggregationAnnotator.AGGREGATION_STRATEGY, aggregationStrategy);

      AnalysisEngine ae = createPrimitive(aed);
      JCas jcas = setup_1segment(testDocument, ae);

      ae.process(jcas);

      List<String> expectedBookIndexPhrases = new ArrayList<String>();
      expectedBookIndexPhrases.add("bar");
      expectedBookIndexPhrases.add("foo");
      expectedBookIndexPhrases.add("baz");

      List<String> resultBookIndexPhrases = new ArrayList<String>();
      for (BookIndexPhrase b : JCasUtil.select(jcas, BookIndexPhrase.class)) {
        resultBookIndexPhrases.add(b.getPhrase());
      }

      assertEquals(expectedBookIndexPhrases, resultBookIndexPhrases);
    }
  }

Exemplo n.º 8

0

Exibir arquivo

Arquivo: MaxentBooleanOutcomeDataWriterTest.java Projeto: alainloisel/cleartk

  @Test
  public void test1() throws Exception {
    AnalysisEngine dataWriterAnnotator =
        AnalysisEngineFactory.createEngine(
            Test1Annotator.class,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
            outputDirectoryName,
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
            MaxentBooleanOutcomeDataWriter.class.getName());

    dataWriterAnnotator.process(jCas);
    dataWriterAnnotator.collectionProcessComplete();

    File trainFile =
        new MaxentBooleanOutcomeClassifierBuilder().getTrainingDataFile(this.outputDirectory);
    String[] lines = FileUtil.loadListOfStrings(trainFile);
    assertEquals("true pos_NN distance=3.0 precision=1.234", lines[0]);
    assertEquals("false name_2PO p's=2.0", lines[1]);
    assertEquals("true null=0.0", lines[2]);
    assertEquals("false A_B_AB", lines[3]);

    // simply train four different models where each one writes over the previous
    HideOutput hider = new HideOutput();
    Train.main(outputDirectoryName, "10", "1");
    hider.restoreOutput();
  }

Exemplo n.º 9

0

Exibir arquivo

Arquivo: SentenceAnnotator.java Projeto: heisaman/java-deeplearning

 public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
   return AnalysisEngineFactory.createPrimitiveDescription(
       SentenceAnnotator.class,
       PARAM_SENTENCE_MODEL_PATH,
       ParamUtil.getParameterValue(PARAM_SENTENCE_MODEL_PATH, "/models/en-sent.bin"),
       PARAM_WINDOW_CLASS_NAMES,
       ParamUtil.getParameterValue(PARAM_WINDOW_CLASS_NAMES, null));
 }

Exemplo n.º 10

0

Exibir arquivo

Arquivo: Baseline.java Projeto: pakchoi/Iyas

  public void runForArabic() throws UIMAException {
    this.stopwords = new Stopwords(Stopwords.STOPWORD_AR);
    this.stopwords = new Stopwords("semeval2015-3/arabic-corpus-specific-stopwords.txt");

    this.pfArabic = new PairFeatureFactoryArabic(this.alphabet);
    this.pfArabic.setupMeasures(RichNode.OUTPUT_PAR_TOKEN_LOWERCASE, this.stopwords);

    this.language = LANG_ARABIC;

    this.preliminaryCas = JCasFactory.createJCas();

    /** Specify the task label For Arabic there is just one task */
    this.a_labels.add("direct");
    this.a_labels.add("related");
    this.a_labels.add("irrelevant");

    /**
     * Instantiate the QCRI Analyzer, but for now we are using the analysis engines instantiated
     * later on
     */
    if (USE_QCRI_ALT_TOOLS) {
      this.analyzer = new Analyzer(new UIMANoPersistence());
      analyzer.addAE(
          AnalysisEngineFactory.createEngine(createEngineDescription(ArabicAnalyzer.class)));
    } else {
      /**
       * Whitespace tokenizer. The Stanford Segmenter for Arabic has a very bad bug and the
       * tokenization is completely wrong.
       */
      AnalysisEngine segmenter = createEngine(createEngineDescription(WhitespaceTokenizer.class));
      /** Stanford POS-Tagger */
      AnalysisEngine postagger =
          createEngine(
              createEngineDescription(
                  StanfordPosTagger.class,
                  StanfordPosTagger.PARAM_LANGUAGE,
                  "ar",
                  StanfordPosTagger.PARAM_VARIANT,
                  "accurate"));
      /** Putting together the UIMA DKPro annotators */
      this.analysisEngineList = new AnalysisEngine[2];
      this.analysisEngineList[0] = segmenter;
      this.analysisEngineList[1] = postagger;
    }

    try {
      processArabicFile(analyzer, CQA_QL_TRAIN_AR, "train");
      processArabicFile(analyzer, CQA_QL_DEV_AR, "dev");
    } catch (SimilarityException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }

Exemplo n.º 11

0

Exibir arquivo

Arquivo: PoStagger.java Projeto: strategist922/Canova

 public static AnalysisEngineDescription getDescription(String languageCode)
     throws ResourceInitializationException {
   String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode);
   return AnalysisEngineFactory.createEngineDescription(
       PoStagger.class,
       UimaUtil.MODEL_PARAMETER,
       ExternalResourceFactory.createExternalResourceDescription(
           POSModelResourceImpl.class, PoStagger.class.getResource(modelPath).toString()),
       UimaUtil.SENTENCE_TYPE_PARAMETER,
       Sentence.class.getName(),
       UimaUtil.TOKEN_TYPE_PARAMETER,
       Token.class.getName(),
       UimaUtil.POS_FEATURE_PARAMETER,
       "pos");
 }

Exemplo n.º 12

0

Exibir arquivo

Arquivo: ExamplePosAnnotatorTest.java Projeto: alainloisel/cleartk

  @Test
  public void testDataWriterDescriptor() throws UIMAException {
    AnalysisEngine engine =
        AnalysisEngineFactory.createEngine(
            ExamplePosAnnotator.getWriterDescription(ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY));

    String outputDir =
        (String) engine.getConfigParameterValue(DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY);
    outputDir = outputDir.replace(File.separatorChar, '/');
    Assert.assertEquals(ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY, outputDir);

    String expectedDataWriterFactory = (ViterbiDataWriterFactory.class.getName());
    Object dataWriter =
        engine.getConfigParameterValue(
            CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME);
    Assert.assertEquals(expectedDataWriterFactory, dataWriter);
    engine.collectionProcessComplete();
  }

Exemplo n.º 13

0

Exibir arquivo

Arquivo: RunModel.java Projeto: alainloisel/cleartk

  public static void main(String[] args) throws Exception {
    Options options = CliFactory.parseArguments(Options.class, args);

    List<File> testFiles =
        DocumentClassificationEvaluation.getFilesFromDirectory(options.getTestDirectory());

    DocumentClassificationEvaluation evaluation =
        new DocumentClassificationEvaluation(options.getModelsDirectory());
    CollectionReader collectionReader = evaluation.getCollectionReader(testFiles);

    AggregateBuilder builder =
        DocumentClassificationEvaluation.createDocumentClassificationAggregate(
            options.getModelsDirectory(), AnnotatorMode.CLASSIFY);

    SimplePipeline.runPipeline(
        collectionReader,
        builder.createAggregateDescription(),
        AnalysisEngineFactory.createEngineDescription(PrintClassificationsAnnotator.class));
  }

Exemplo n.º 14

0

Exibir arquivo

Arquivo: ExamplePosAnnotatorTest.java Projeto: alainloisel/cleartk

  @Test
  public void testAnnotatorDescriptor() throws Exception {
    HideOutput hider = new HideOutput();
    BuildTestExamplePosModel.main();
    hider.restoreOutput();

    String modelFileName =
        JarClassifierBuilder.getModelJarFile(ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY)
            .getPath();
    AnalysisEngineDescription posTaggerDescription =
        ExamplePosAnnotator.getClassifierDescription(modelFileName);
    AnalysisEngine engine = AnalysisEngineFactory.createEngine(posTaggerDescription);

    Object classifierJar =
        engine.getConfigParameterValue(GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH);
    Assert.assertEquals(modelFileName, classifierJar);

    engine.collectionProcessComplete();
  }

Exemplo n.º 15

0

Exibir arquivo

Arquivo: OpenNLPParserTest.java Projeto: RokeManorResearch/baleen

  @Override
  protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

    final ExternalResourceDescription parserChunkingDesc =
        ExternalResourceFactory.createExternalResourceDescription(
            "parserChunking", SharedOpenNLPModel.class);

    // Add in the OpenNLP implementation too, as its a prerequisite
    // (in theory we should test OpenNLPParser in isolation, but in practise
    // it as this as a
    // dependency
    // so better test they work together)

    final ExternalResourceDescription tokensDesc =
        ExternalResourceFactory.createExternalResourceDescription(
            "tokens", SharedOpenNLPModel.class);
    final ExternalResourceDescription sentencesDesc =
        ExternalResourceFactory.createExternalResourceDescription(
            "sentences", SharedOpenNLPModel.class);
    final ExternalResourceDescription posDesc =
        ExternalResourceFactory.createExternalResourceDescription(
            "posTags", SharedOpenNLPModel.class);
    final ExternalResourceDescription chunksDesc =
        ExternalResourceFactory.createExternalResourceDescription(
            "phraseChunks", SharedOpenNLPModel.class);

    AnalysisEngineFactory.createEngineDescription();

    return asArray(
        createAnalysisEngine(
            OpenNLP.class,
            "tokens",
            tokensDesc,
            "sentences",
            sentencesDesc,
            "posTags",
            posDesc,
            "phraseChunks",
            chunksDesc),
        createAnalysisEngine(OpenNLPParser.class, "parserChunking", parserChunkingDesc));
  }

Exemplo n.º 16

0

Exibir arquivo

Arquivo: FeatureValueReplacerTest.java Projeto: textocat/textokit-core

  @Test
  public void test() throws UIMAException {
    AnalysisEngine ae =
        AnalysisEngineFactory.createEngine(
            FeatureValueReplacer.class,
            tsd,
            FeatureValueReplacer.PARAM_ANNO_TYPE,
            DocumentMetadata.class.getName(),
            FeatureValueReplacer.PARAM_FEATURE_PATH,
            "sourceUri",
            FeatureValueReplacer.PARAM_PATTERN,
            "file:.+/([^/]+)$",
            FeatureValueReplacer.PARAM_REPLACE_BY,
            "$1");
    JCas cas = ae.newCAS().getJCas();
    cas.setDocumentText("Bla bla");
    DocumentMetadata metaAnno = new DocumentMetadata(cas);
    metaAnno.setBegin(0);
    metaAnno.setEnd(0);
    metaAnno.setSourceUri("file:/d:/somefolder/somemore/foobar.txt");
    metaAnno.addToIndexes();

    ae.process(cas);

    metaAnno = (DocumentMetadata) cas.getAnnotationIndex(DocumentMetadata.type).iterator().next();
    assertEquals("foobar.txt", metaAnno.getSourceUri());

    // next trial
    cas = ae.newCAS().getJCas();
    cas.setDocumentText("Bla bla more");
    metaAnno = new DocumentMetadata(cas);
    metaAnno.setBegin(0);
    metaAnno.setEnd(0);
    metaAnno.setSourceUri("http://example.org/qwerty.txt");
    metaAnno.addToIndexes();

    ae.process(cas);

    metaAnno = (DocumentMetadata) cas.getAnnotationIndex(DocumentMetadata.type).iterator().next();
    assertEquals("http://example.org/qwerty.txt", metaAnno.getSourceUri());
  }

Exemplo n.º 17

0

Exibir arquivo

Arquivo: MaxentBooleanOutcomeDataWriterTest.java Projeto: alainloisel/cleartk

  /** Here we test that an exception is thrown if an instance with no outcome */
  @Test
  public void test4() throws Exception {

    HideOutput hider = new HideOutput();

    AnalysisEngine dataWriterAnnotator =
        AnalysisEngineFactory.createEngine(
            Test4Annotator.class,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
            outputDirectoryName,
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
            MaxentBooleanOutcomeDataWriter.class.getName());

    AnalysisEngineProcessException aepe = null;
    try {
      dataWriterAnnotator.process(jCas);
    } catch (AnalysisEngineProcessException e) {
      aepe = e;
    }
    dataWriterAnnotator.collectionProcessComplete();
    assertNotNull(aepe);
    hider.restoreOutput();
  }

Exemplo n.º 18

0

Exibir arquivo

Arquivo: ParentheticalAnnotator.java Projeto: alainloisel/cleartk

  public static AnalysisEngineDescription getDescription(
      Class<? extends Annotation> parentheticalClass,
      Class<? extends Annotation> windowClass,
      char leftParen,
      char rightParen)
      throws ResourceInitializationException {
    AnalysisEngineDescription aed =
        AnalysisEngineFactory.createEngineDescription(
            ParentheticalAnnotator.class,
            PARAM_LEFT_PARENTHESIS,
            "" + leftParen,
            PARAM_RIGHT_PARENTHESIS,
            "" + rightParen,
            PARAM_PARENTHETICAL_TYPE_NAME,
            parentheticalClass.getName());

    if (windowClass != null) {
      ConfigurationParameterFactory.addConfigurationParameters(
          aed, PARAM_WINDOW_TYPE_NAME, windowClass.getName());
    }

    return aed;
  }

Exemplo n.º 19

0

Exibir arquivo

Arquivo: TempEval2007Writer.java Projeto: alainloisel/cleartk

 public static AnalysisEngineDescription getDescription(String outputDir)
     throws ResourceInitializationException {
   return AnalysisEngineFactory.createEngineDescription(
       TempEval2007Writer.class, PARAM_OUTPUT_DIRECTORY_NAME, outputDir);
 }

Exemplo n.º 20

0

Exibir arquivo

Arquivo: TokenizerAPI.java Projeto: CLLKazan/UIMA-Ext

 /**
  * @return AE description instance
  * @throws UIMAException
  * @throws IOException
  */
 public static AnalysisEngineDescription getAEDescription() throws UIMAException, IOException {
   return AnalysisEngineFactory.createEngineDescription(AE_TOKENIZER);
 }

Exemplo n.º 21

0

Exibir arquivo

Arquivo: LuceneNGramMetaCollectorTest.java Projeto: niklas-meyer/dkpro-tc

  @Test
  public void luceneNgramMetaCollectorTest() throws Exception {
    File tmpDir = folder.newFolder();

    CollectionReaderDescription reader =
        CollectionReaderFactory.createReaderDescription(
            TextReader.class,
            TextReader.PARAM_SOURCE_LOCATION,
            "src/test/resources/data/",
            TextReader.PARAM_LANGUAGE,
            "en",
            TextReader.PARAM_PATTERNS,
            "text*.txt");

    AnalysisEngineDescription segmenter =
        AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);

    AnalysisEngineDescription metaCollector =
        AnalysisEngineFactory.createEngineDescription(
            LuceneNGramMetaCollector.class, LuceneNGramDFE.PARAM_LUCENE_DIR, tmpDir);

    for (JCas jcas : new JCasIterable(reader, segmenter, metaCollector)) {
      //            System.out.println(jcas.getDocumentText().length());
    }

    int i = 0;
    IndexReader index;
    try {
      index = DirectoryReader.open(FSDirectory.open(tmpDir));
      Fields fields = MultiFields.getFields(index);
      if (fields != null) {
        Terms terms = fields.terms(LuceneNGramDFE.LUCENE_NGRAM_FIELD);
        if (terms != null) {
          TermsEnum termsEnum = terms.iterator(null);
          //                    Bits liveDocs = MultiFields.getLiveDocs(index);
          //                    DocsEnum docs = termsEnum.docs(liveDocs, null);
          //                    int docId;
          //                    while((docId = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          //                        index.g
          //                    }
          BytesRef text = null;
          while ((text = termsEnum.next()) != null) {
            //                        System.out.println(text.utf8ToString() + " - " +
            // termsEnum.totalTermFreq());
            //                        System.out.println(termsEnum.docFreq());

            if (text.utf8ToString().equals("this")) {
              assertEquals(2, termsEnum.docFreq());
              assertEquals(3, termsEnum.totalTermFreq());
            }

            i++;
          }
        }
      }
    } catch (Exception e) {
      throw new ResourceInitializationException(e);
    }

    assertEquals(35, i);
  }

Exemplo n.º 22

0

Exibir arquivo

Arquivo: ExamplePosAnnotatorTest.java Projeto: alainloisel/cleartk

  @Test
  public void testSimpleSentence() throws Exception {
    AnalysisEngine engine =
        AnalysisEngineFactory.createEngine(
            ExamplePosAnnotator.class,
            CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
            PublicFieldSequenceDataWriter.StringFactory.class.getName(),
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
            ".");

    // create some tokens with part of speech tags
    tokenBuilder.buildTokens(
        jCas,
        "The Absurdis retreated in 2003.",
        "The Absurdis retreated in 2003 .", // the tokenized version of the text
        "DT NNP VBD IN CD .");

    List<Instance<String>> instances =
        PublicFieldSequenceDataWriter.StringFactory.collectInstances(engine, jCas);

    List<String> featureValues;

    // check "The"
    featureValues =
        Arrays.asList(
            "The", // word
            "the", // lower case
            "INITIAL_UPPERCASE", // capital type
            // numeric type
            "he", // last 2 chars
            "The", // last 3 chars
            "OOB2", // left 2 words
            "OOB1",
            "Absurdis", // right 2 words
            "retreated");
    Assert.assertEquals(featureValues, this.getFeatureValues(instances.get(0)));
    Assert.assertEquals("DT", instances.get(0).getOutcome());

    // check "Absurdis"
    featureValues =
        Arrays.asList(
            "Absurdis", // word
            "absurdis", // lower case
            "INITIAL_UPPERCASE", // capital type
            // numeric type
            "is", // last 2 chars
            "dis", // last 3 chars
            "OOB1", // left 2 words
            "The",
            "retreated", // right 2 words
            "in");
    Assert.assertEquals(featureValues, this.getFeatureValues(instances.get(1)));
    Assert.assertEquals("NNP", instances.get(1).getOutcome());

    // check "retreated"
    featureValues =
        Arrays.asList(
            "retreated", // word
            "retreated", // lower case
            "ALL_LOWERCASE", // capital type
            // numeric type
            "ed", // last 2 chars
            "ted", // last 3 chars
            "The", // left 2 words
            "Absurdis", // right 2 words
            "in",
            "2003");
    Assert.assertEquals(featureValues, this.getFeatureValues(instances.get(2)));
    Assert.assertEquals("VBD", instances.get(2).getOutcome());

    // check "in"
    featureValues =
        Arrays.asList(
            "in", // word
            "in", // lower case
            "ALL_LOWERCASE", // capital type
            // numeric type
            "in", // last 2 chars
            // last 3 chars
            "Absurdis", // left 2 words
            "retreated",
            "2003", // right 2 words
            ".");
    Assert.assertEquals(featureValues, this.getFeatureValues(instances.get(3)));
    Assert.assertEquals("IN", instances.get(3).getOutcome());

    // check "2003"
    featureValues =
        Arrays.asList(
            "2003", // word
            "2003", // lower case
            // capital type
            "YEAR_DIGITS", // numeric type
            "03", // last 2 chars
            "003", // last 3 chars
            "retreated", // left 2 words
            "in",
            ".", // right 2 words
            "OOB1");
    Assert.assertEquals(featureValues, this.getFeatureValues(instances.get(4)));
    Assert.assertEquals("CD", instances.get(4).getOutcome());

    // check "."
    featureValues =
        Arrays.asList(
            ".", // word
            ".", // lower case
            // capital type
            // numeric type
            // last 2 chars
            // last 3 chars
            "in", // left 2 words
            "2003",
            "OOB1", // right 2 words
            "OOB2");
    Assert.assertEquals(featureValues, this.getFeatureValues(instances.get(5)));
    Assert.assertEquals(".", instances.get(5).getOutcome());
  }