예제 #1
0
  /**
   * Lemmatise a phrase or word. If a phrase, only lemmatise the most RHS word.
   *
   * @param value
   * @return
   */
  public String normalise(String value) {
    if (value.indexOf(" ") == -1
        || value.endsWith(" s")
        || value.endsWith(
            "'s")) // if string is a single word, or it is in "XYZ's" form where the ' char has been
                   // removed
    return lemmatizer.lemmatize(value, 1).trim();

    String part1 = value.substring(0, value.lastIndexOf(" "));
    String part2 = lemmatizer.lemmatize(value.substring(value.lastIndexOf(" ") + 1), 1);
    return part1 + " " + part2.trim();
  }
 /**
  * Outputs the settings for this configuration to the console, very useful for ensuring the
  * configuration is set as desired prior to a training run
  */
 public void log() {
   System.out.println(
       "Lemmatiser: " + (lemmatiser == null ? null : lemmatiser.getClass().getName()));
   System.out.println("POSTagger: " + (posTagger == null ? null : posTagger.getClass().getName()));
   System.out.println("Tokenizer: " + tokenizer.getClass().getName());
   System.out.println("Tag format: " + tagFormat.name());
   System.out.println(
       "PostProcessor: " + (postProcessor == null ? null : postProcessor.getClass().getName()));
   System.out.println("Using numeric normalization: " + useNumericNormalization);
   System.out.println("CRF order is " + order);
   System.out.println("Using feature induction: " + useFeatureInduction);
   System.out.println("Text textDirection: " + textDirection);
 }
예제 #3
0
 /**
  * @param value original word
  * @param pos the part of speech of the last word
  * @return the lemma of original word
  */
 public String getLemma(String value, String pos) {
   int POS = tagLookUp.get(pos);
   if (POS == 0) return lemmatizer.lemmatize(value);
   else return lemmatizer.lemmatize(value, POS);
 }