/** * This was the method found in the {@link com.cybozu.labs.langdetect.Detector} class, it was used * to extract grams from the to-analyze text. * * <p>NOTE: although it adds the first ngram with space, it does not add the last n-gram with * space. example: "foo" gives " fo" but not "oo "!. It is not clear yet whether this is desired * (and why) or a bug. * * <p>TODO replace this algorithm with a simpler, faster one that uses less memory: only by * position shifting. also, the returned list size can be computed before making it (based on text * length and number of n-grams). * * @author Nakatani Shuyo */ @NotNull @Deprecated public static List<String> extractNGrams(@NotNull CharSequence text, @Nullable Filter filter) { List<String> list = new ArrayList<>(); NGram ngram = new NGram(); for (int i = 0; i < text.length(); ++i) { ngram.addChar(text.charAt(i)); for (int n = 1; n <= NGram.N_GRAM; ++n) { String w = ngram.get(n); if (w != null) { // TODO this null check is ugly if (filter == null || filter.use(w)) { list.add(w); } } } } return list; }