/** * Get all the terms in the index with low edit distance from the supplied term * * @param reader the index * @param luceneName the field to search in * @param searchTerms search terms * @param maxEdits maximum edit distance (Levenshtein algorithm) for matches (i.e. lower is more * similar) * @return the set of terms in the index that are close to our search term * @throws BooleanQuery.TooManyClauses if the expansion resulted in too many terms */ public static Set<String> getMatchingTermsFromIndex( IndexReader reader, String luceneName, Collection<String> searchTerms, int maxEdits) { boolean doFuzzy = true; if (maxEdits == 0) { // Exact match; don't use fuzzy query (slow) Set<String> result = new HashSet<>(); try { for (String term : searchTerms) { if (reader.docFreq(new Term(luceneName, term)) > 0) result.add(term); } } catch (IOException e) { throw new RuntimeException(e); } return result; } BooleanQuery q = new BooleanQuery(); for (String s : searchTerms) { FuzzyQuery fq = new FuzzyQuery(new Term(luceneName, s), maxEdits); q.add(fq, Occur.SHOULD); } try { Query rewritten = q.rewrite(reader); WeightedTerm[] wts = QueryTermExtractor.getTerms(rewritten); Set<String> terms = new HashSet<>(); for (WeightedTerm wt : wts) { if (doFuzzy || searchTerms.contains(wt.getTerm())) { terms.add(wt.getTerm()); } } return terms; } catch (IOException e) { throw new RuntimeException(e); } }