public void testBooleanOptionalNoTiebreaker() throws Exception { BooleanQuery q = new BooleanQuery(); { DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f); q1.add(tq("hed", "albino")); q1.add(tq("dek", "albino")); q.add(q1, BooleanClause.Occur.SHOULD); // false,false); } { DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f); q2.add(tq("hed", "elephant")); q2.add(tq("dek", "elephant")); q.add(q2, BooleanClause.Occur.SHOULD); // false,false); } QueryUtils.check(q, s); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("4 docs should match " + q.toString(), 4, h.length); float score = h[0].score; for (int i = 1; i < h.length - 1; i++) { /* note: -1 */ assertEquals("score #" + i + " is not the same", score, h[i].score, SCORE_COMP_THRESH); } assertEquals("wrong last", "d1", s.doc(h[h.length - 1].doc).get("id")); float score1 = h[h.length - 1].score; assertTrue( "d1 does not have worse score then others: " + score + " >? " + score1, score > score1); } catch (Error e) { printHits("testBooleanOptionalNoTiebreaker", h, s); throw e; } }
@Override public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { if (query instanceof MatchAllDocsQuery // MatchNoDocsQuery currently rewrites to a BooleanQuery, // but who knows, it might get its own Weight one day || query instanceof MatchNoDocsQuery) { return false; } if (query instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) query; if (bq.clauses().isEmpty()) { return false; } } if (query instanceof DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; if (dmq.getDisjuncts().isEmpty()) { return false; } } if (segmentPolicy.shouldCache(query, context) == false) { return false; } final int frequency = frequency(query); final int minFrequency = minFrequencyToCache(query); return frequency >= minFrequency; }
protected Query dmq(float tieBreakerMultiplier, Query... queries) { DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier); for (Query q : queries) { query.add(q); } return query; }
/** * Create a shallow copy of us -- used in rewriting if necessary * * @return a copy of us (but reuse, don't copy, our subqueries) */ @Override @SuppressWarnings("unchecked") public DisjunctionMaxQuery clone() { DisjunctionMaxQuery clone = (DisjunctionMaxQuery) super.clone(); clone.disjuncts = (ArrayList<Query>) this.disjuncts.clone(); return clone; }
public void testBooleanRequiredEqualScores() throws Exception { BooleanQuery q = new BooleanQuery(); { DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f); q1.add(tq("hed", "albino")); q1.add(tq("dek", "albino")); q.add(q1, BooleanClause.Occur.MUST); // true,false); QueryUtils.check(q1, s); } { DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f); q2.add(tq("hed", "elephant")); q2.add(tq("dek", "elephant")); q.add(q2, BooleanClause.Occur.MUST); // true,false); QueryUtils.check(q2, s); } QueryUtils.check(q, s); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("3 docs should match " + q.toString(), 3, h.length); float score = h[0].score; for (int i = 1; i < h.length; i++) { assertEquals("score #" + i + " is not the same", score, h[i].score, SCORE_COMP_THRESH); } } catch (Error e) { printHits("testBooleanRequiredEqualScores1", h, s); throw e; } }
/** * Return true iff we represent the same query as o * * @param o another object * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the * same order, as us */ @Override public boolean equals(Object o) { if (!(o instanceof DisjunctionMaxQuery)) return false; DisjunctionMaxQuery other = (DisjunctionMaxQuery) o; return this.getBoost() == other.getBoost() && this.tieBreakerMultiplier == other.tieBreakerMultiplier && this.disjuncts.equals(other.disjuncts); }
public void testDMQ7() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.MUST_NOT); booleanQuery.add(new TermQuery(new Term(FIELD, "w3")), BooleanClause.Occur.SHOULD); q.add(booleanQuery); q.add(new TermQuery(new Term(FIELD, "w2"))); qtest(q, new int[] {0, 1, 2, 3}); }
public void testSkipToFirsttimeHit() throws IOException { final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); dq.add(tq("dek", "albino")); dq.add(tq("dek", "DOES_NOT_EXIST")); QueryUtils.check(dq, s); final Weight dw = dq.weight(s); final Scorer ds = dw.scorer(s.getIndexReader(), true, false); assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS); assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id")); }
public void testToQueryDisMaxQuery() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("test") .field(STRING_FIELD_NAME, 2.2f) .field(STRING_FIELD_NAME_2) .useDisMax(true) .toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query; List<Query> disjuncts = disMaxQuery.getDisjuncts(); assertTermOrBoostQuery(disjuncts.get(0), STRING_FIELD_NAME, "test", 2.2f); assertTermOrBoostQuery(disjuncts.get(1), STRING_FIELD_NAME_2, "test", 1.0f); }
public void testBooleanOptionalWithTiebreaker() throws Exception { BooleanQuery q = new BooleanQuery(); { DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f); q1.add(tq("hed", "albino")); q1.add(tq("dek", "albino")); q.add(q1, BooleanClause.Occur.SHOULD); // false,false); } { DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f); q2.add(tq("hed", "elephant")); q2.add(tq("dek", "elephant")); q.add(q2, BooleanClause.Occur.SHOULD); // false,false); } QueryUtils.check(q, s); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("4 docs should match " + q.toString(), 4, h.length); float score0 = h[0].score; float score1 = h[1].score; float score2 = h[2].score; float score3 = h[3].score; String doc0 = s.doc(h[0].doc).get("id"); String doc1 = s.doc(h[1].doc).get("id"); String doc2 = s.doc(h[2].doc).get("id"); String doc3 = s.doc(h[3].doc).get("id"); assertTrue("doc0 should be d2 or d4: " + doc0, doc0.equals("d2") || doc0.equals("d4")); assertTrue("doc1 should be d2 or d4: " + doc0, doc1.equals("d2") || doc1.equals("d4")); assertEquals("score0 and score1 should match", score0, score1, SCORE_COMP_THRESH); assertEquals("wrong third", "d3", doc2); assertTrue( "d3 does not have worse score then d2 and d4: " + score1 + " >? " + score2, score1 > score2); assertEquals("wrong fourth", "d1", doc3); assertTrue( "d1 does not have worse score then d3: " + score2 + " >? " + score3, score2 > score3); } catch (Error e) { printHits("testBooleanOptionalWithTiebreaker", h, s); throw e; } }
public void testSkipToFirsttimeMiss() throws IOException { final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); dq.add(tq("id", "d1")); dq.add(tq("dek", "DOES_NOT_EXIST")); QueryUtils.check(dq, s); final Weight dw = dq.weight(s); final Scorer ds = dw.scorer(s.getIndexReader(), true, false); final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS; if (skipOk) { fail("firsttime skipTo found a match? ... " + r.document(ds.docID()).get("id")); } }
public void testToQueryPhraseQueryBoostAndSlop() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder("\"test phrase\"~2").field(STRING_FIELD_NAME, 5f); Query query = queryStringQueryBuilder.toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query; assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1)); assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(BoostQuery.class)); BoostQuery boostQuery = (BoostQuery) disjunctionMaxQuery.getDisjuncts().get(0); assertThat(boostQuery.getBoost(), equalTo(5f)); assertThat(boostQuery.getQuery(), instanceOf(PhraseQuery.class)); PhraseQuery phraseQuery = (PhraseQuery) boostQuery.getQuery(); assertThat(phraseQuery.getSlop(), Matchers.equalTo(2)); assertThat(phraseQuery.getTerms().length, equalTo(2)); }
public void testToQueryMultipleFieldsDisMaxQuery() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("test") .field(STRING_FIELD_NAME) .field(STRING_FIELD_NAME_2) .useDisMax(true) .toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query; List<Query> disjuncts = disMaxQuery.getDisjuncts(); assertThat( ((TermQuery) disjuncts.get(0)).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test"))); assertThat( ((TermQuery) disjuncts.get(1)).getTerm(), equalTo(new Term(STRING_FIELD_NAME_2, "test"))); }
public void testToQueryPhraseQuery() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("\"term1 term2\"") .defaultField(STRING_FIELD_NAME) .phraseSlop(3) .toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query; assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1)); assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(PhraseQuery.class)); PhraseQuery phraseQuery = (PhraseQuery) disjunctionMaxQuery.getDisjuncts().get(0); assertThat(phraseQuery.getTerms().length, equalTo(2)); assertThat(phraseQuery.getTerms()[0], equalTo(new Term(STRING_FIELD_NAME, "term1"))); assertThat(phraseQuery.getTerms()[1], equalTo(new Term(STRING_FIELD_NAME, "term2"))); assertThat(phraseQuery.getSlop(), equalTo(3)); }
public void testDMQ9() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.SHOULD); TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w5")); boostedQuery.setBoost(100); booleanQuery.add(boostedQuery, BooleanClause.Occur.SHOULD); q.add(booleanQuery); TermQuery xxBoostedQuery = new TermQuery(new Term(FIELD, "xx")); xxBoostedQuery.setBoost(0); q.add(xxBoostedQuery); qtest(q, new int[] {0, 2, 3}); }
public Query getQuery(Element e) throws ParserException { float tieBreaker = DOMUtils.getAttribute(e, "tieBreaker", 0.0f); DisjunctionMaxQuery dq = new DisjunctionMaxQuery(tieBreaker); dq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); NodeList nl = e.getChildNodes(); for (int i = 0; i < nl.getLength(); i++) { Node node = nl.item(i); if (node instanceof Element) { // all elements are disjuncts. Element queryElem = (Element) node; Query q = factory.getQuery(queryElem); dq.add(q); } } return dq; }
public void testSimpleEqualScores2() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f); q.add(tq("dek", "albino")); q.add(tq("dek", "elephant")); QueryUtils.check(q, s); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("3 docs should match " + q.toString(), 3, h.length); float score = h[0].score; for (int i = 1; i < h.length; i++) { assertEquals("score #" + i + " is not the same", score, h[i].score, SCORE_COMP_THRESH); } } catch (Error e) { printHits("testSimpleEqualScores2", h, s); throw e; } }
public void testSimpleTiebreaker() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f); q.add(tq("dek", "albino")); q.add(tq("dek", "elephant")); QueryUtils.check(q, s); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("3 docs should match " + q.toString(), 3, h.length); assertEquals("wrong first", "d2", s.doc(h[0].doc).get("id")); float score0 = h[0].score; float score1 = h[1].score; float score2 = h[2].score; assertTrue( "d2 does not have better score then others: " + score0 + " >? " + score1, score0 > score1); assertEquals("d4 and d1 don't have equal scores", score1, score2, SCORE_COMP_THRESH); } catch (Error e) { printHits("testSimpleTiebreaker", h, s); throw e; } }
public void testBooleanOptionalWithTiebreakerAndBoost() throws Exception { BooleanQuery q = new BooleanQuery(); { DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f); q1.add(tq("hed", "albino", 1.5f)); q1.add(tq("dek", "albino")); q.add(q1, BooleanClause.Occur.SHOULD); // false,false); } { DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f); q2.add(tq("hed", "elephant", 1.5f)); q2.add(tq("dek", "elephant")); q.add(q2, BooleanClause.Occur.SHOULD); // false,false); } QueryUtils.check(q, s); ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; try { assertEquals("4 docs should match " + q.toString(), 4, h.length); float score0 = h[0].score; float score1 = h[1].score; float score2 = h[2].score; float score3 = h[3].score; String doc0 = s.doc(h[0].doc).get("id"); String doc1 = s.doc(h[1].doc).get("id"); String doc2 = s.doc(h[2].doc).get("id"); String doc3 = s.doc(h[3].doc).get("id"); assertEquals("doc0 should be d4: ", "d4", doc0); assertEquals("doc1 should be d3: ", "d3", doc1); assertEquals("doc2 should be d2: ", "d2", doc2); assertEquals("doc3 should be d1: ", "d1", doc3); assertTrue( "d4 does not have a better score then d3: " + score0 + " >? " + score1, score0 > score1); assertTrue( "d3 does not have a better score then d2: " + score1 + " >? " + score2, score1 > score2); assertTrue( "d3 does not have a better score then d1: " + score2 + " >? " + score3, score2 > score3); } catch (Error e) { printHits("testBooleanOptionalWithTiebreakerAndBoost", h, s); throw e; } }
public void testDMQ2() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); q.add(new TermQuery(new Term(FIELD, "w1"))); q.add(new TermQuery(new Term(FIELD, "w5"))); qtest(q, new int[] {0, 1, 2, 3}); }
public void testDMQ4() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); q.add(new TermQuery(new Term(FIELD, "QQ"))); q.add(new TermQuery(new Term(FIELD, "xx"))); qtest(q, new int[] {2, 3}); }
@Test public void testGetThatFieldProbabilityRatioIsReflectedInBoost() throws Exception { ArgumentCaptor<Float> normalizeCaptor = ArgumentCaptor.forClass(Float.class); DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection(); Directory directory = newDirectory(); Analyzer analyzer = new Analyzer() { protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new WordDelimiterFilter( source, WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE, null); filter = new LowerCaseFilter(filter); return new TokenStreamComponents(source, filter); } }; IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setCodec(Codec.forName(TestUtil.LUCENE_CODEC)); IndexWriter indexWriter = new IndexWriter(directory, conf); // Both fields f1 and f2 have 10 terms in total. // f1: the search terms (abc def) make 100% of all terms in f1 // f2: the search terms (abc def) make 50% of all terms in f2 // --> we expect that the sum of the boost factors for terms in bq(+f1:abc, +f1:def) // equals 2 * sum of the boost factors for terms in bq(+f2:abc, +f2:def) PRMSFieldBoostTest.addNumDocs("f1", "abc def", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f1", "abc", indexWriter, 4); PRMSFieldBoostTest.addNumDocs("f1", "def", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f2", "abc def", indexWriter, 1); PRMSFieldBoostTest.addNumDocs("f2", "abc", indexWriter, 2); PRMSFieldBoostTest.addNumDocs("f2", "def", indexWriter, 1); PRMSFieldBoostTest.addNumDocs("f2", "ghi", indexWriter, 5); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(similarity); Map<String, Float> fields = new HashMap<>(); fields.put("f1", 1f); fields.put("f2", 1f); SearchFieldsAndBoosting searchFieldsAndBoosting = new SearchFieldsAndBoosting(FieldBoostModel.PRMS, fields, fields, 0.8f); LuceneQueryBuilder queryBuilder = new LuceneQueryBuilder(dfc, analyzer, searchFieldsAndBoosting, 0.01f, null); WhiteSpaceQuerqyParser parser = new WhiteSpaceQuerqyParser(); Query query = queryBuilder.createQuery(parser.parse("AbcDef")); dfc.finishedUserQuery(); assertTrue(query instanceof DisjunctionMaxQuery); DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; List<Query> disjuncts = dmq.getDisjuncts(); assertEquals(2, disjuncts.size()); Query disjunct1 = disjuncts.get(0); if (disjunct1 instanceof BoostQuery) { disjunct1 = ((BoostQuery) disjunct1).getQuery(); } assertTrue(disjunct1 instanceof BooleanQuery); BooleanQuery bq1 = (BooleanQuery) disjunct1; Query disjunct2 = disjuncts.get(1); if (disjunct2 instanceof BoostQuery) { disjunct2 = ((BoostQuery) disjunct2).getQuery(); } assertTrue(disjunct2 instanceof BooleanQuery); BooleanQuery bq2 = (BooleanQuery) disjunct2; final Weight weight1 = bq1.createWeight(indexSearcher, true); weight1.normalize(0.1f, 4f); final Weight weight2 = bq2.createWeight(indexSearcher, true); weight2.normalize(0.1f, 4f); Mockito.verify(simWeight, times(4)).normalize(eq(0.1f), normalizeCaptor.capture()); final List<Float> capturedBoosts = normalizeCaptor.getAllValues(); // capturedBoosts = boosts of [bq1.term1, bq1.term2, bq2.term1, bq2.term2 ] assertEquals(capturedBoosts.get(0), capturedBoosts.get(1), 0.00001); assertEquals(capturedBoosts.get(2), capturedBoosts.get(3), 0.00001); assertEquals(2f, capturedBoosts.get(0) / capturedBoosts.get(3), 0.00001); indexReader.close(); directory.close(); analyzer.close(); }