public void testBooleanOptionalNoTiebreaker() throws Exception {

    BooleanQuery q = new BooleanQuery();
    {
      DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
      q1.add(tq("hed", "albino"));
      q1.add(tq("dek", "albino"));
      q.add(q1, BooleanClause.Occur.SHOULD); // false,false);
    }
    {
      DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
      q2.add(tq("hed", "elephant"));
      q2.add(tq("dek", "elephant"));
      q.add(q2, BooleanClause.Occur.SHOULD); // false,false);
    }
    QueryUtils.check(q, s);

    ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;

    try {
      assertEquals("4 docs should match " + q.toString(), 4, h.length);
      float score = h[0].score;
      for (int i = 1; i < h.length - 1; i++) {
        /* note: -1 */
        assertEquals("score #" + i + " is not the same", score, h[i].score, SCORE_COMP_THRESH);
      }
      assertEquals("wrong last", "d1", s.doc(h[h.length - 1].doc).get("id"));
      float score1 = h[h.length - 1].score;
      assertTrue(
          "d1 does not have worse score then others: " + score + " >? " + score1, score > score1);
    } catch (Error e) {
      printHits("testBooleanOptionalNoTiebreaker", h, s);
      throw e;
    }
  }
 @Override
 public boolean shouldCache(Query query, LeafReaderContext context) throws IOException {
   if (query instanceof MatchAllDocsQuery
       // MatchNoDocsQuery currently rewrites to a BooleanQuery,
       // but who knows, it might get its own Weight one day
       || query instanceof MatchNoDocsQuery) {
     return false;
   }
   if (query instanceof BooleanQuery) {
     BooleanQuery bq = (BooleanQuery) query;
     if (bq.clauses().isEmpty()) {
       return false;
     }
   }
   if (query instanceof DisjunctionMaxQuery) {
     DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
     if (dmq.getDisjuncts().isEmpty()) {
       return false;
     }
   }
   if (segmentPolicy.shouldCache(query, context) == false) {
     return false;
   }
   final int frequency = frequency(query);
   final int minFrequency = minFrequencyToCache(query);
   return frequency >= minFrequency;
 }
 protected Query dmq(float tieBreakerMultiplier, Query... queries) {
   DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
   for (Query q : queries) {
     query.add(q);
   }
   return query;
 }
 /**
  * Create a shallow copy of us -- used in rewriting if necessary
  *
  * @return a copy of us (but reuse, don't copy, our subqueries)
  */
 @Override
 @SuppressWarnings("unchecked")
 public DisjunctionMaxQuery clone() {
   DisjunctionMaxQuery clone = (DisjunctionMaxQuery) super.clone();
   clone.disjuncts = (ArrayList<Query>) this.disjuncts.clone();
   return clone;
 }
  public void testBooleanRequiredEqualScores() throws Exception {

    BooleanQuery q = new BooleanQuery();
    {
      DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
      q1.add(tq("hed", "albino"));
      q1.add(tq("dek", "albino"));
      q.add(q1, BooleanClause.Occur.MUST); // true,false);
      QueryUtils.check(q1, s);
    }
    {
      DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
      q2.add(tq("hed", "elephant"));
      q2.add(tq("dek", "elephant"));
      q.add(q2, BooleanClause.Occur.MUST); // true,false);
      QueryUtils.check(q2, s);
    }

    QueryUtils.check(q, s);

    ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;

    try {
      assertEquals("3 docs should match " + q.toString(), 3, h.length);
      float score = h[0].score;
      for (int i = 1; i < h.length; i++) {
        assertEquals("score #" + i + " is not the same", score, h[i].score, SCORE_COMP_THRESH);
      }
    } catch (Error e) {
      printHits("testBooleanRequiredEqualScores1", h, s);
      throw e;
    }
  }
 /**
  * Return true iff we represent the same query as o
  *
  * @param o another object
  * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the
  *     same order, as us
  */
 @Override
 public boolean equals(Object o) {
   if (!(o instanceof DisjunctionMaxQuery)) return false;
   DisjunctionMaxQuery other = (DisjunctionMaxQuery) o;
   return this.getBoost() == other.getBoost()
       && this.tieBreakerMultiplier == other.tieBreakerMultiplier
       && this.disjuncts.equals(other.disjuncts);
 }
  public void testDMQ7() throws Exception {
    DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f);

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.MUST_NOT);
    booleanQuery.add(new TermQuery(new Term(FIELD, "w3")), BooleanClause.Occur.SHOULD);

    q.add(booleanQuery);
    q.add(new TermQuery(new Term(FIELD, "w2")));
    qtest(q, new int[] {0, 1, 2, 3});
  }
  public void testSkipToFirsttimeHit() throws IOException {
    final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
    dq.add(tq("dek", "albino"));
    dq.add(tq("dek", "DOES_NOT_EXIST"));

    QueryUtils.check(dq, s);

    final Weight dw = dq.weight(s);
    final Scorer ds = dw.scorer(s.getIndexReader(), true, false);
    assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id"));
  }
 public void testToQueryDisMaxQuery() throws Exception {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   Query query =
       queryStringQuery("test")
           .field(STRING_FIELD_NAME, 2.2f)
           .field(STRING_FIELD_NAME_2)
           .useDisMax(true)
           .toQuery(createShardContext());
   assertThat(query, instanceOf(DisjunctionMaxQuery.class));
   DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query;
   List<Query> disjuncts = disMaxQuery.getDisjuncts();
   assertTermOrBoostQuery(disjuncts.get(0), STRING_FIELD_NAME, "test", 2.2f);
   assertTermOrBoostQuery(disjuncts.get(1), STRING_FIELD_NAME_2, "test", 1.0f);
 }
  public void testBooleanOptionalWithTiebreaker() throws Exception {

    BooleanQuery q = new BooleanQuery();
    {
      DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
      q1.add(tq("hed", "albino"));
      q1.add(tq("dek", "albino"));
      q.add(q1, BooleanClause.Occur.SHOULD); // false,false);
    }
    {
      DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
      q2.add(tq("hed", "elephant"));
      q2.add(tq("dek", "elephant"));
      q.add(q2, BooleanClause.Occur.SHOULD); // false,false);
    }
    QueryUtils.check(q, s);

    ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;

    try {

      assertEquals("4 docs should match " + q.toString(), 4, h.length);

      float score0 = h[0].score;
      float score1 = h[1].score;
      float score2 = h[2].score;
      float score3 = h[3].score;

      String doc0 = s.doc(h[0].doc).get("id");
      String doc1 = s.doc(h[1].doc).get("id");
      String doc2 = s.doc(h[2].doc).get("id");
      String doc3 = s.doc(h[3].doc).get("id");

      assertTrue("doc0 should be d2 or d4: " + doc0, doc0.equals("d2") || doc0.equals("d4"));
      assertTrue("doc1 should be d2 or d4: " + doc0, doc1.equals("d2") || doc1.equals("d4"));
      assertEquals("score0 and score1 should match", score0, score1, SCORE_COMP_THRESH);
      assertEquals("wrong third", "d3", doc2);
      assertTrue(
          "d3 does not have worse score then d2 and d4: " + score1 + " >? " + score2,
          score1 > score2);

      assertEquals("wrong fourth", "d1", doc3);
      assertTrue(
          "d1 does not have worse score then d3: " + score2 + " >? " + score3, score2 > score3);

    } catch (Error e) {
      printHits("testBooleanOptionalWithTiebreaker", h, s);
      throw e;
    }
  }
  public void testSkipToFirsttimeMiss() throws IOException {
    final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
    dq.add(tq("id", "d1"));
    dq.add(tq("dek", "DOES_NOT_EXIST"));

    QueryUtils.check(dq, s);

    final Weight dw = dq.weight(s);
    final Scorer ds = dw.scorer(s.getIndexReader(), true, false);
    final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS;
    if (skipOk) {
      fail("firsttime skipTo found a match? ... " + r.document(ds.docID()).get("id"));
    }
  }
 public void testToQueryPhraseQueryBoostAndSlop() throws IOException {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   QueryStringQueryBuilder queryStringQueryBuilder =
       new QueryStringQueryBuilder("\"test phrase\"~2").field(STRING_FIELD_NAME, 5f);
   Query query = queryStringQueryBuilder.toQuery(createShardContext());
   assertThat(query, instanceOf(DisjunctionMaxQuery.class));
   DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query;
   assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1));
   assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(BoostQuery.class));
   BoostQuery boostQuery = (BoostQuery) disjunctionMaxQuery.getDisjuncts().get(0);
   assertThat(boostQuery.getBoost(), equalTo(5f));
   assertThat(boostQuery.getQuery(), instanceOf(PhraseQuery.class));
   PhraseQuery phraseQuery = (PhraseQuery) boostQuery.getQuery();
   assertThat(phraseQuery.getSlop(), Matchers.equalTo(2));
   assertThat(phraseQuery.getTerms().length, equalTo(2));
 }
 public void testToQueryMultipleFieldsDisMaxQuery() throws Exception {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   Query query =
       queryStringQuery("test")
           .field(STRING_FIELD_NAME)
           .field(STRING_FIELD_NAME_2)
           .useDisMax(true)
           .toQuery(createShardContext());
   assertThat(query, instanceOf(DisjunctionMaxQuery.class));
   DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query;
   List<Query> disjuncts = disMaxQuery.getDisjuncts();
   assertThat(
       ((TermQuery) disjuncts.get(0)).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test")));
   assertThat(
       ((TermQuery) disjuncts.get(1)).getTerm(), equalTo(new Term(STRING_FIELD_NAME_2, "test")));
 }
 public void testToQueryPhraseQuery() throws IOException {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   Query query =
       queryStringQuery("\"term1 term2\"")
           .defaultField(STRING_FIELD_NAME)
           .phraseSlop(3)
           .toQuery(createShardContext());
   assertThat(query, instanceOf(DisjunctionMaxQuery.class));
   DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query;
   assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1));
   assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(PhraseQuery.class));
   PhraseQuery phraseQuery = (PhraseQuery) disjunctionMaxQuery.getDisjuncts().get(0);
   assertThat(phraseQuery.getTerms().length, equalTo(2));
   assertThat(phraseQuery.getTerms()[0], equalTo(new Term(STRING_FIELD_NAME, "term1")));
   assertThat(phraseQuery.getTerms()[1], equalTo(new Term(STRING_FIELD_NAME, "term2")));
   assertThat(phraseQuery.getSlop(), equalTo(3));
 }
  public void testDMQ9() throws Exception {
    DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f);

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.SHOULD);

    TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w5"));
    boostedQuery.setBoost(100);
    booleanQuery.add(boostedQuery, BooleanClause.Occur.SHOULD);
    q.add(booleanQuery);

    TermQuery xxBoostedQuery = new TermQuery(new Term(FIELD, "xx"));
    xxBoostedQuery.setBoost(0);
    q.add(xxBoostedQuery);

    qtest(q, new int[] {0, 2, 3});
  }
  public Query getQuery(Element e) throws ParserException {
    float tieBreaker = DOMUtils.getAttribute(e, "tieBreaker", 0.0f);
    DisjunctionMaxQuery dq = new DisjunctionMaxQuery(tieBreaker);
    dq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));

    NodeList nl = e.getChildNodes();
    for (int i = 0; i < nl.getLength(); i++) {
      Node node = nl.item(i);
      if (node instanceof Element) { // all elements are disjuncts.
        Element queryElem = (Element) node;
        Query q = factory.getQuery(queryElem);
        dq.add(q);
      }
    }

    return dq;
  }
  public void testSimpleEqualScores2() throws Exception {

    DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
    q.add(tq("dek", "albino"));
    q.add(tq("dek", "elephant"));
    QueryUtils.check(q, s);

    ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;

    try {
      assertEquals("3 docs should match " + q.toString(), 3, h.length);
      float score = h[0].score;
      for (int i = 1; i < h.length; i++) {
        assertEquals("score #" + i + " is not the same", score, h[i].score, SCORE_COMP_THRESH);
      }
    } catch (Error e) {
      printHits("testSimpleEqualScores2", h, s);
      throw e;
    }
  }
  public void testSimpleTiebreaker() throws Exception {

    DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f);
    q.add(tq("dek", "albino"));
    q.add(tq("dek", "elephant"));
    QueryUtils.check(q, s);

    ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;

    try {
      assertEquals("3 docs should match " + q.toString(), 3, h.length);
      assertEquals("wrong first", "d2", s.doc(h[0].doc).get("id"));
      float score0 = h[0].score;
      float score1 = h[1].score;
      float score2 = h[2].score;
      assertTrue(
          "d2 does not have better score then others: " + score0 + " >? " + score1,
          score0 > score1);
      assertEquals("d4 and d1 don't have equal scores", score1, score2, SCORE_COMP_THRESH);
    } catch (Error e) {
      printHits("testSimpleTiebreaker", h, s);
      throw e;
    }
  }
  public void testBooleanOptionalWithTiebreakerAndBoost() throws Exception {

    BooleanQuery q = new BooleanQuery();
    {
      DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
      q1.add(tq("hed", "albino", 1.5f));
      q1.add(tq("dek", "albino"));
      q.add(q1, BooleanClause.Occur.SHOULD); // false,false);
    }
    {
      DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
      q2.add(tq("hed", "elephant", 1.5f));
      q2.add(tq("dek", "elephant"));
      q.add(q2, BooleanClause.Occur.SHOULD); // false,false);
    }
    QueryUtils.check(q, s);

    ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;

    try {

      assertEquals("4 docs should match " + q.toString(), 4, h.length);

      float score0 = h[0].score;
      float score1 = h[1].score;
      float score2 = h[2].score;
      float score3 = h[3].score;

      String doc0 = s.doc(h[0].doc).get("id");
      String doc1 = s.doc(h[1].doc).get("id");
      String doc2 = s.doc(h[2].doc).get("id");
      String doc3 = s.doc(h[3].doc).get("id");

      assertEquals("doc0 should be d4: ", "d4", doc0);
      assertEquals("doc1 should be d3: ", "d3", doc1);
      assertEquals("doc2 should be d2: ", "d2", doc2);
      assertEquals("doc3 should be d1: ", "d1", doc3);

      assertTrue(
          "d4 does not have a better score then d3: " + score0 + " >? " + score1, score0 > score1);
      assertTrue(
          "d3 does not have a better score then d2: " + score1 + " >? " + score2, score1 > score2);
      assertTrue(
          "d3 does not have a better score then d1: " + score2 + " >? " + score3, score2 > score3);

    } catch (Error e) {
      printHits("testBooleanOptionalWithTiebreakerAndBoost", h, s);
      throw e;
    }
  }
 public void testDMQ2() throws Exception {
   DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f);
   q.add(new TermQuery(new Term(FIELD, "w1")));
   q.add(new TermQuery(new Term(FIELD, "w5")));
   qtest(q, new int[] {0, 1, 2, 3});
 }
 public void testDMQ4() throws Exception {
   DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f);
   q.add(new TermQuery(new Term(FIELD, "QQ")));
   q.add(new TermQuery(new Term(FIELD, "xx")));
   qtest(q, new int[] {2, 3});
 }
Exemple #22
0
  @Test
  public void testGetThatFieldProbabilityRatioIsReflectedInBoost() throws Exception {

    ArgumentCaptor<Float> normalizeCaptor = ArgumentCaptor.forClass(Float.class);

    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Directory directory = newDirectory();

    Analyzer analyzer =
        new Analyzer() {
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new WhitespaceTokenizer();
            TokenStream filter =
                new WordDelimiterFilter(
                    source,
                    WordDelimiterFilter.GENERATE_WORD_PARTS
                        | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE,
                    null);
            filter = new LowerCaseFilter(filter);
            return new TokenStreamComponents(source, filter);
          }
        };

    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setCodec(Codec.forName(TestUtil.LUCENE_CODEC));
    IndexWriter indexWriter = new IndexWriter(directory, conf);

    // Both fields f1 and f2 have 10 terms in total.
    // f1: the search terms (abc def) make 100% of all terms in f1
    // f2: the search terms (abc def) make 50% of all terms in f2
    // --> we expect that the sum of the boost factors for terms in bq(+f1:abc, +f1:def)
    // equals 2 * sum of the boost factors for terms in bq(+f2:abc, +f2:def)

    PRMSFieldBoostTest.addNumDocs("f1", "abc def", indexWriter, 2);
    PRMSFieldBoostTest.addNumDocs("f1", "abc", indexWriter, 4);
    PRMSFieldBoostTest.addNumDocs("f1", "def", indexWriter, 2);
    PRMSFieldBoostTest.addNumDocs("f2", "abc def", indexWriter, 1);
    PRMSFieldBoostTest.addNumDocs("f2", "abc", indexWriter, 2);
    PRMSFieldBoostTest.addNumDocs("f2", "def", indexWriter, 1);
    PRMSFieldBoostTest.addNumDocs("f2", "ghi", indexWriter, 5);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(similarity);

    Map<String, Float> fields = new HashMap<>();
    fields.put("f1", 1f);
    fields.put("f2", 1f);
    SearchFieldsAndBoosting searchFieldsAndBoosting =
        new SearchFieldsAndBoosting(FieldBoostModel.PRMS, fields, fields, 0.8f);

    LuceneQueryBuilder queryBuilder =
        new LuceneQueryBuilder(dfc, analyzer, searchFieldsAndBoosting, 0.01f, null);

    WhiteSpaceQuerqyParser parser = new WhiteSpaceQuerqyParser();

    Query query = queryBuilder.createQuery(parser.parse("AbcDef"));
    dfc.finishedUserQuery();

    assertTrue(query instanceof DisjunctionMaxQuery);

    DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
    List<Query> disjuncts = dmq.getDisjuncts();
    assertEquals(2, disjuncts.size());

    Query disjunct1 = disjuncts.get(0);
    if (disjunct1 instanceof BoostQuery) {
      disjunct1 = ((BoostQuery) disjunct1).getQuery();
    }
    assertTrue(disjunct1 instanceof BooleanQuery);

    BooleanQuery bq1 = (BooleanQuery) disjunct1;

    Query disjunct2 = disjuncts.get(1);
    if (disjunct2 instanceof BoostQuery) {
      disjunct2 = ((BoostQuery) disjunct2).getQuery();
    }
    assertTrue(disjunct2 instanceof BooleanQuery);

    BooleanQuery bq2 = (BooleanQuery) disjunct2;

    final Weight weight1 = bq1.createWeight(indexSearcher, true);
    weight1.normalize(0.1f, 4f);

    final Weight weight2 = bq2.createWeight(indexSearcher, true);
    weight2.normalize(0.1f, 4f);

    Mockito.verify(simWeight, times(4)).normalize(eq(0.1f), normalizeCaptor.capture());
    final List<Float> capturedBoosts = normalizeCaptor.getAllValues();

    // capturedBoosts = boosts of [bq1.term1, bq1.term2, bq2.term1, bq2.term2 ]
    assertEquals(capturedBoosts.get(0), capturedBoosts.get(1), 0.00001);
    assertEquals(capturedBoosts.get(2), capturedBoosts.get(3), 0.00001);
    assertEquals(2f, capturedBoosts.get(0) / capturedBoosts.get(3), 0.00001);

    indexReader.close();
    directory.close();
    analyzer.close();
  }