Пример #1
0
  // LUCENE-5725
  public void testMultiValues() throws Exception {
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});

    BooleanQuery query =
        (BooleanQuery)
            mlt.like(
                "text",
                new StringReader("lucene"),
                new StringReader("lucene release"),
                new StringReader("apache"),
                new StringReader("apache lucene"));
    Collection<BooleanClause> clauses = query.clauses();
    assertEquals("Expected 2 clauses only!", 2, clauses.size());
    for (BooleanClause clause : clauses) {
      Term term = ((TermQuery) clause.getQuery()).getTerm();
      assertTrue(
          Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
    }
    analyzer.close();
  }
 private MoreLikeThis getMoreLikeThis() {
   MoreLikeThis mlt = new MoreLikeThis(reader); // Pass the reader reader
   mlt.setMaxDocFreqPct(maxPercentage);
   mlt.setMaxQueryTerms(maxQueryTerms);
   mlt.setMinDocFreq(minDocFreq);
   mlt.setMinTermFreq(minTermFreq);
   mlt.setAnalyzer(analyzer);
   mlt.setFieldNames(new String[] {"text"}); // specify the fields for similiarity
   return mlt;
 }
Пример #3
0
 // LUCENE-3326
 public void testMultiFields() throws Exception {
   MoreLikeThis mlt = new MoreLikeThis(reader);
   Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
   mlt.setAnalyzer(analyzer);
   mlt.setMinDocFreq(1);
   mlt.setMinTermFreq(1);
   mlt.setMinWordLen(1);
   mlt.setFieldNames(new String[] {"text", "foobar"});
   mlt.like("foobar", new StringReader("this is a test"));
   analyzer.close();
 }
Пример #4
0
  public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;

    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
      addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();

    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});

    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
      likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));

    // check best terms are topN of highest idf
    Collection<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());

    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
      expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
      Term term = ((TermQuery) clause.getQuery()).getTerm();
      assertTrue(Arrays.asList(expectedTerms).contains(term));
    }

    // clean up
    reader.close();
    dir.close();
    analyzer.close();
  }
Пример #5
0
  public void testBoostFactor() throws Throwable {
    Map<String, Float> originalValues = getOriginalValues();

    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});
    mlt.setBoost(true);

    // this mean that every term boost factor will be multiplied by this
    // number
    float boostFactor = 5;
    mlt.setBoostFactor(boostFactor);

    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
    Collection<BooleanClause> clauses = query.clauses();

    assertEquals(
        "Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size());

    for (BooleanClause clause : clauses) {
      BoostQuery bq = (BoostQuery) clause.getQuery();
      TermQuery tq = (TermQuery) bq.getQuery();
      Float termBoost = originalValues.get(tq.getTerm().text());
      assertNotNull("Expected term " + tq.getTerm().text(), termBoost);

      float totalBoost = termBoost * boostFactor;
      assertEquals(
          "Expected boost of "
              + totalBoost
              + " for term '"
              + tq.getTerm().text()
              + "' got "
              + bq.getBoost(),
          totalBoost,
          bq.getBoost(),
          0.0001);
    }
    analyzer.close();
  }
Пример #6
0
  private Map<String, Float> getOriginalValues() throws IOException {
    Map<String, Float> originalValues = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});
    mlt.setBoost(true);
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
    Collection<BooleanClause> clauses = query.clauses();

    for (BooleanClause clause : clauses) {
      BoostQuery bq = (BoostQuery) clause.getQuery();
      TermQuery tq = (TermQuery) bq.getQuery();
      originalValues.put(tq.getTerm().text(), bq.getBoost());
    }
    analyzer.close();
    return originalValues;
  }
Пример #7
0
  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161")
  public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception {
    IndexReader reader = null;
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    try {
      int maxQueryTerms = 25;

      String[] itShopItemForSale =
          new String[] {
            "watch",
            "ipod",
            "asrock",
            "imac",
            "macbookpro",
            "monitor",
            "keyboard",
            "mouse",
            "speakers"
          };
      String[] itShopItemNotForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"};

      String[] clothesShopItemForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"};
      String[] clothesShopItemNotForSale =
          new String[] {
            "watch",
            "ipod",
            "asrock",
            "imac",
            "macbookpro",
            "monitor",
            "keyboard",
            "mouse",
            "speakers"
          };

      // add series of shop docs
      RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
      for (int i = 0; i < 300; i++) {
        addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale);
      }
      for (int i = 0; i < 300; i++) {
        addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
      }
      // Input Document is a clothes shop
      int inputDocId =
          addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
      reader = writer.getReader();
      writer.close();

      // setup MLT query
      MoreLikeThis mlt = new MoreLikeThis(reader);

      mlt.setAnalyzer(analyzer);
      mlt.setMaxQueryTerms(maxQueryTerms);
      mlt.setMinDocFreq(1);
      mlt.setMinTermFreq(1);
      mlt.setMinWordLen(1);
      mlt.setFieldNames(new String[] {FOR_SALE, NOT_FOR_SALE});

      // perform MLT query
      BooleanQuery query = (BooleanQuery) mlt.like(inputDocId);
      Collection<BooleanClause> clauses = query.clauses();

      Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>();
      for (String itemForSale : clothesShopItemForSale) {
        BooleanClause booleanClause =
            new BooleanClause(
                new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD);
        expectedClothesShopClauses.add(booleanClause);
      }
      for (String itemNotForSale : clothesShopItemNotForSale) {
        BooleanClause booleanClause =
            new BooleanClause(
                new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD);
        expectedClothesShopClauses.add(booleanClause);
      }

      for (BooleanClause expectedClause : expectedClothesShopClauses) {
        assertTrue(clauses.contains(expectedClause));
      }
    } finally {
      // clean up
      if (reader != null) {
        reader.close();
      }
      dir.close();
      analyzer.close();
    }
  }