/** * Outputs the settings for this configuration to the console, very useful for ensuring the * configuration is set as desired prior to a training run */ public void log() { System.out.println( "Lemmatiser: " + (lemmatiser == null ? null : lemmatiser.getClass().getName())); System.out.println("POSTagger: " + (posTagger == null ? null : posTagger.getClass().getName())); System.out.println("Tokenizer: " + tokenizer.getClass().getName()); System.out.println("Tag format: " + tagFormat.name()); System.out.println( "PostProcessor: " + (postProcessor == null ? null : postProcessor.getClass().getName())); System.out.println("Using numeric normalization: " + useNumericNormalization); System.out.println("CRF order is " + order); System.out.println("Using feature induction: " + useFeatureInduction); System.out.println("Text textDirection: " + textDirection); }
protected List<String> process(String input) { if (input == null) throw new IllegalArgumentException(); List<String> tokens = tokenizer.getTokens(input); for (int i = 0; i < tokens.size(); i++) tokens.set(i, transform(tokens.get(i))); return tokens; }