private static void planFuzzyQuery(final StringBuilder builder, final FuzzyQuery query) { builder.append(query.getTerm()); builder.append(",prefixLength="); builder.append(query.getPrefixLength()); builder.append(",minSimilarity="); builder.append(query.getMinSimilarity()); }
private final void populateTerm( final HQuery hQuery, final boolean isShould, final boolean isMust, final Query subQueryL) throws FederatedSearchException { HTerm hTerm = new HTerm(); hTerm.isShould = isShould; hTerm.isMust = isMust; hTerm.boost = subQueryL.getBoost(); hQuery.terms.add(hTerm); if (subQueryL instanceof TermQuery) { TermQuery lTerm = (TermQuery) subQueryL; hTerm.type = lTerm.getTerm().field(); hTerm.text = lTerm.getTerm().text(); } else if (subQueryL instanceof FuzzyQuery) { FuzzyQuery lTerm = (FuzzyQuery) subQueryL; hTerm.isFuzzy = true; hTerm.type = lTerm.getTerm().field(); hTerm.text = lTerm.getTerm().text(); } else if (subQueryL instanceof TermRangeQuery) { TermRangeQuery lTerm = (TermRangeQuery) subQueryL; hTerm.isFuzzy = false; hTerm.type = lTerm.getField(); hTerm.minRange = lTerm.getLowerTerm(); hTerm.maxRange = lTerm.getUpperTerm(); } else { throw new FederatedSearchException( "HQueryParser: Not Implemented Query :" + subQueryL.getClass().toString()); } }
public void assertFromTestData(int codePointTable[]) throws Exception { InputStream stream = getClass().getResourceAsStream("fuzzyTestData.txt"); BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); int bits = Integer.parseInt(reader.readLine()); int terms = (int) Math.pow(2, bits); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false)) .setMergePolicy(newLogMergePolicy())); Document doc = new Document(); Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); doc.add(field); for (int i = 0; i < terms; i++) { field.setValue(mapInt(codePointTable, i)); writer.addDocument(doc); } IndexReader r = writer.getReader(); IndexSearcher searcher = newSearcher(r); writer.close(); String line; while ((line = reader.readLine()) != null) { String params[] = line.split(","); String query = mapInt(codePointTable, Integer.parseInt(params[0])); int prefix = Integer.parseInt(params[1]); int pqSize = Integer.parseInt(params[2]); float minScore = Float.parseFloat(params[3]); FuzzyQuery q = new FuzzyQuery(new Term("field", query), minScore, prefix); q.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize)); int expectedResults = Integer.parseInt(reader.readLine()); TopDocs docs = searcher.search(q, expectedResults); assertEquals(expectedResults, docs.totalHits); for (int i = 0; i < expectedResults; i++) { String scoreDoc[] = reader.readLine().split(","); assertEquals(Integer.parseInt(scoreDoc[0]), docs.scoreDocs[i].doc); assertEquals(Float.parseFloat(scoreDoc[1]), docs.scoreDocs[i].score, epsilon); } } searcher.close(); r.close(); dir.close(); }
private static JsonObject convertFuzzyQuery(FuzzyQuery query) { String field = query.getTerm().field(); JsonObject fuzzyQuery = JsonBuilder.object() .add( "fuzzy", JsonBuilder.object() .add( field, JsonBuilder.object() .addProperty("value", query.getTerm().text()) .addProperty("fuzziness", query.getMaxEdits()) .addProperty("prefix_length", query.getPrefixLength()) .addProperty("boost", query.getBoost()))) .build(); return wrapQueryForNestedIfRequired(field, fuzzyQuery); }
public void testWildcard() throws Exception { assertQueryEquals("term*", null, "term*"); assertQueryEquals("term*^2", null, "term*^2.0"); assertQueryEquals("term~", null, "term~0.5"); assertQueryEquals("term~0.7", null, "term~0.7"); assertQueryEquals("term~^2", null, "term~0.5^2.0"); assertQueryEquals("term^2~", null, "term~0.5^2.0"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "term*germ^3.0"); assertTrue(getQuery("term*", null) instanceof PrefixQuery); assertTrue(getQuery("term*^2", null) instanceof PrefixQuery); assertTrue(getQuery("term~", null) instanceof FuzzyQuery); assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery); FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null); assertEquals(0.7f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); fq = (FuzzyQuery) getQuery("term~", null); assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); assertQueryNodeException("term~1.1"); // value > 1, throws exception assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); /* * Tests to see that wild card terms are (or are not) properly lower-cased * with propery parser configuration */ // First prefix queries: // by default, convert to lowercase: assertWildcardQueryEquals("Term*", true, "term*"); // explicitly set lowercase: assertWildcardQueryEquals("term*", true, "term*"); assertWildcardQueryEquals("Term*", true, "term*"); assertWildcardQueryEquals("TERM*", true, "term*"); // explicitly disable lowercase conversion: assertWildcardQueryEquals("term*", false, "term*"); assertWildcardQueryEquals("Term*", false, "Term*"); assertWildcardQueryEquals("TERM*", false, "TERM*"); // Then 'full' wildcard queries: // by default, convert to lowercase: assertWildcardQueryEquals("Te?m", "te?m"); // explicitly set lowercase: assertWildcardQueryEquals("te?m", true, "te?m"); assertWildcardQueryEquals("Te?m", true, "te?m"); assertWildcardQueryEquals("TE?M", true, "te?m"); assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ"); // explicitly disable lowercase conversion: assertWildcardQueryEquals("te?m", false, "te?m"); assertWildcardQueryEquals("Te?m", false, "Te?m"); assertWildcardQueryEquals("TE?M", false, "TE?M"); assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM"); // Fuzzy queries: assertWildcardQueryEquals("Term~", "term~0.5"); assertWildcardQueryEquals("Term~", true, "term~0.5"); assertWildcardQueryEquals("Term~", false, "Term~0.5"); // Range queries: // TODO: implement this on QueryParser // Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO // C]': Lexical error at line 1, column 1. Encountered: "[" (91), after // : "" assertWildcardQueryEquals("[A TO C]", "[a TO c]"); assertWildcardQueryEquals("[A TO C]", true, "[a TO c]"); assertWildcardQueryEquals("[A TO C]", false, "[A TO C]"); // Test suffix queries: first disallow try { assertWildcardQueryEquals("*Term", true, "*term"); fail(); } catch (QueryNodeException pe) { // expected exception } try { assertWildcardQueryEquals("?Term", true, "?term"); fail(); } catch (QueryNodeException pe) { // expected exception } // Test suffix queries: then allow assertWildcardQueryEquals("*Term", true, "*term", true); assertWildcardQueryEquals("?Term", true, "?term", true); }
/** * Extracts all MultiTermQueries for {@code field}, and returns equivalent automata that will * match terms. */ static CharacterRunAutomaton[] extractAutomata(Query query, String field) { List<CharacterRunAutomaton> list = new ArrayList<>(); if (query instanceof BooleanQuery) { for (BooleanClause clause : (BooleanQuery) query) { if (!clause.isProhibited()) { list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field))); } } } else if (query instanceof ConstantScoreQuery) { list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field))); } else if (query instanceof DisjunctionMaxQuery) { for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); } } else if (query instanceof SpanOrQuery) { for (Query sub : ((SpanOrQuery) query).getClauses()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); } } else if (query instanceof SpanNearQuery) { for (Query sub : ((SpanNearQuery) query).getClauses()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); } } else if (query instanceof SpanNotQuery) { list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field))); } else if (query instanceof SpanPositionCheckQuery) { list.addAll( Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field))); } else if (query instanceof SpanMultiTermQueryWrapper) { list.addAll( Arrays.asList( extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field))); } else if (query instanceof AutomatonQuery) { final AutomatonQuery aq = (AutomatonQuery) query; if (aq.getField().equals(field)) { list.add( new CharacterRunAutomaton(aq.getAutomaton()) { @Override public String toString() { return aq.toString(); } }); } } else if (query instanceof PrefixQuery) { final PrefixQuery pq = (PrefixQuery) query; Term prefix = pq.getPrefix(); if (prefix.field().equals(field)) { list.add( new CharacterRunAutomaton( Operations.concatenate( Automata.makeString(prefix.text()), Automata.makeAnyString())) { @Override public String toString() { return pq.toString(); } }); } } else if (query instanceof FuzzyQuery) { final FuzzyQuery fq = (FuzzyQuery) query; if (fq.getField().equals(field)) { String utf16 = fq.getTerm().text(); int termText[] = new int[utf16.codePointCount(0, utf16.length())]; for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) { termText[j++] = cp = utf16.codePointAt(i); } int termLength = termText.length; int prefixLength = Math.min(fq.getPrefixLength(), termLength); String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength); LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions()); String prefix = UnicodeUtil.newString(termText, 0, prefixLength); Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix); list.add( new CharacterRunAutomaton(automaton) { @Override public String toString() { return fq.toString(); } }); } } else if (query instanceof TermRangeQuery) { final TermRangeQuery tq = (TermRangeQuery) query; if (tq.getField().equals(field)) { final CharsRef lowerBound; if (tq.getLowerTerm() == null) { lowerBound = null; } else { lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString()); } final CharsRef upperBound; if (tq.getUpperTerm() == null) { upperBound = null; } else { upperBound = new CharsRef(tq.getUpperTerm().utf8ToString()); } final boolean includeLower = tq.includesLower(); final boolean includeUpper = tq.includesUpper(); final CharsRef scratch = new CharsRef(); final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator(); // this is *not* an automaton, but it's very simple list.add( new CharacterRunAutomaton(Automata.makeEmpty()) { @Override public boolean run(char[] s, int offset, int length) { scratch.chars = s; scratch.offset = offset; scratch.length = length; if (lowerBound != null) { int cmp = comparator.compare(scratch, lowerBound); if (cmp < 0 || (!includeLower && cmp == 0)) { return false; } } if (upperBound != null) { int cmp = comparator.compare(scratch, upperBound); if (cmp > 0 || (!includeUpper && cmp == 0)) { return false; } } return true; } @Override public String toString() { return tq.toString(); } }); } } return list.toArray(new CharacterRunAutomaton[list.size()]); }
public void testWildcard() throws Exception { assertQueryEquals("term*", null, "term*"); assertQueryEquals("term*^2", null, "term*^2.0"); assertQueryEquals("term~", null, "term~0.5"); assertQueryEquals("term~0.7", null, "term~0.7"); assertQueryEquals("term~^2", null, "term~0.5^2.0"); assertQueryEquals("term^2~", null, "term~0.5^2.0"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "term*germ^3.0"); assertTrue(getQuery("term*", null) instanceof PrefixQuery); assertTrue(getQuery("term*^2", null) instanceof PrefixQuery); assertTrue(getQuery("term~", null) instanceof FuzzyQuery); assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery); FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null); assertEquals(0.7f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); fq = (FuzzyQuery) getQuery("term~", null); assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); assertParseException("term~1.1"); // value > 1, throws exception assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); /* Tests to see that wild card terms are (or are not) properly * lower-cased with propery parser configuration */ // First prefix queries: // by default, convert to lowercase: assertWildcardQueryEquals("Term*", true, "term*"); // explicitly set lowercase: assertWildcardQueryEquals("term*", true, "term*"); assertWildcardQueryEquals("Term*", true, "term*"); assertWildcardQueryEquals("TERM*", true, "term*"); // explicitly disable lowercase conversion: assertWildcardQueryEquals("term*", false, "term*"); assertWildcardQueryEquals("Term*", false, "Term*"); assertWildcardQueryEquals("TERM*", false, "TERM*"); // Then 'full' wildcard queries: // by default, convert to lowercase: assertWildcardQueryEquals("Te?m", "te?m"); // explicitly set lowercase: assertWildcardQueryEquals("te?m", true, "te?m"); assertWildcardQueryEquals("Te?m", true, "te?m"); assertWildcardQueryEquals("TE?M", true, "te?m"); assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ"); // explicitly disable lowercase conversion: assertWildcardQueryEquals("te?m", false, "te?m"); assertWildcardQueryEquals("Te?m", false, "Te?m"); assertWildcardQueryEquals("TE?M", false, "TE?M"); assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM"); // Fuzzy queries: assertWildcardQueryEquals("Term~", "term~0.5"); assertWildcardQueryEquals("Term~", true, "term~0.5"); assertWildcardQueryEquals("Term~", false, "Term~0.5"); // Range queries: assertWildcardQueryEquals("[A TO C]", "[a TO c]"); assertWildcardQueryEquals("[A TO C]", true, "[a TO c]"); assertWildcardQueryEquals("[A TO C]", false, "[A TO C]"); // Test suffix queries: first disallow try { assertWildcardQueryEquals("*Term", true, "*term"); fail(); } catch (ParseException pe) { // expected exception } try { assertWildcardQueryEquals("?Term", true, "?term"); fail(); } catch (ParseException pe) { // expected exception } // Test suffix queries: then allow assertWildcardQueryEquals("*Term", true, "*term", true); assertWildcardQueryEquals("?Term", true, "?term", true); }