Esempio n. 1
0
  /**
   * Analyze a piece of text
   *
   * @param text the text to be analyzed
   */
  public void analyze(StringBuffer text) {

    if (ngrams != null) {
      ngrams.clear();
      sorted = null;
      ngramcounts = null;
    }

    word.clear().append(SEPARATOR);
    for (int i = 0; i < text.length(); i++) {
      char c = Character.toLowerCase(text.charAt(i));

      if (Character.isLetter(c)) {
        add(word.append(c));
      } else {
        // found word boundary
        if (word.length() > 1) {
          // we have a word!
          add(word.append(SEPARATOR));
          word.clear().append(SEPARATOR);
        }
      }
    }

    if (word.length() > 1) {
      // we have a word!
      add(word.append(SEPARATOR));
    }
    normalize();
  }
Esempio n. 2
0
 /** Add the last NGrams from the specified word. */
 private void add(QuickStringBuffer word) {
   int wlen = word.length();
   if (wlen >= minLength) {
     int max = Math.min(maxLength, wlen);
     for (int i = minLength; i <= max; i++) {
       add(word.subSequence(wlen - i, wlen));
     }
   }
 }
Esempio n. 3
0
 /**
  * @param word
  * @param n sequence length
  */
 private void add(StringBuffer word, int n) {
   for (int i = 0; i <= word.length() - n; i++) {
     add(word.subSequence(i, i + n));
   }
 }
Esempio n. 4
0
 /**
  * Add ngrams from a single word to this profile
  *
  * @param word is the word to add
  */
 public void add(StringBuffer word) {
   for (int i = minLength; (i <= maxLength) && (i < word.length()); i++) {
     add(word, i);
   }
 }
Esempio n. 5
0
 /**
  * Add ngrams from a token to this profile
  *
  * @param t is the Token to be added
  */
 public void add(Token t) {
   add(new StringBuffer().append(SEPARATOR).append(t.termText()).append(SEPARATOR));
 }