/** Add the last NGrams from the specified word. */ private void add(QuickStringBuffer word) { int wlen = word.length(); if (wlen >= minLength) { int max = Math.min(maxLength, wlen); for (int i = minLength; i <= max; i++) { add(word.subSequence(wlen - i, wlen)); } } }
/** * Analyze a piece of text * * @param text the text to be analyzed */ public void analyze(StringBuffer text) { if (ngrams != null) { ngrams.clear(); sorted = null; ngramcounts = null; } word.clear().append(SEPARATOR); for (int i = 0; i < text.length(); i++) { char c = Character.toLowerCase(text.charAt(i)); if (Character.isLetter(c)) { add(word.append(c)); } else { // found word boundary if (word.length() > 1) { // we have a word! add(word.append(SEPARATOR)); word.clear().append(SEPARATOR); } } } if (word.length() > 1) { // we have a word! add(word.append(SEPARATOR)); } normalize(); }