private MoreLikeThis getMoreLikeThis() {
   MoreLikeThis mlt = new MoreLikeThis(reader); // Pass the reader reader
   mlt.setMaxDocFreqPct(maxPercentage);
   mlt.setMaxQueryTerms(maxQueryTerms);
   mlt.setMinDocFreq(minDocFreq);
   mlt.setMinTermFreq(minTermFreq);
   mlt.setAnalyzer(analyzer);
   mlt.setFieldNames(new String[] {"text"}); // specify the fields for similiarity
   return mlt;
 }
Example #2
0
  public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;

    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
      addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();

    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});

    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
      likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));

    // check best terms are topN of highest idf
    Collection<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());

    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
      expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
      Term term = ((TermQuery) clause.getQuery()).getTerm();
      assertTrue(Arrays.asList(expectedTerms).contains(term));
    }

    // clean up
    reader.close();
    dir.close();
    analyzer.close();
  }
Example #3
0
  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161")
  public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception {
    IndexReader reader = null;
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    try {
      int maxQueryTerms = 25;

      String[] itShopItemForSale =
          new String[] {
            "watch",
            "ipod",
            "asrock",
            "imac",
            "macbookpro",
            "monitor",
            "keyboard",
            "mouse",
            "speakers"
          };
      String[] itShopItemNotForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"};

      String[] clothesShopItemForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"};
      String[] clothesShopItemNotForSale =
          new String[] {
            "watch",
            "ipod",
            "asrock",
            "imac",
            "macbookpro",
            "monitor",
            "keyboard",
            "mouse",
            "speakers"
          };

      // add series of shop docs
      RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
      for (int i = 0; i < 300; i++) {
        addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale);
      }
      for (int i = 0; i < 300; i++) {
        addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
      }
      // Input Document is a clothes shop
      int inputDocId =
          addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
      reader = writer.getReader();
      writer.close();

      // setup MLT query
      MoreLikeThis mlt = new MoreLikeThis(reader);

      mlt.setAnalyzer(analyzer);
      mlt.setMaxQueryTerms(maxQueryTerms);
      mlt.setMinDocFreq(1);
      mlt.setMinTermFreq(1);
      mlt.setMinWordLen(1);
      mlt.setFieldNames(new String[] {FOR_SALE, NOT_FOR_SALE});

      // perform MLT query
      BooleanQuery query = (BooleanQuery) mlt.like(inputDocId);
      Collection<BooleanClause> clauses = query.clauses();

      Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>();
      for (String itemForSale : clothesShopItemForSale) {
        BooleanClause booleanClause =
            new BooleanClause(
                new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD);
        expectedClothesShopClauses.add(booleanClause);
      }
      for (String itemNotForSale : clothesShopItemNotForSale) {
        BooleanClause booleanClause =
            new BooleanClause(
                new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD);
        expectedClothesShopClauses.add(booleanClause);
      }

      for (BooleanClause expectedClause : expectedClothesShopClauses) {
        assertTrue(clauses.contains(expectedClause));
      }
    } finally {
      // clean up
      if (reader != null) {
        reader.close();
      }
      dir.close();
      analyzer.close();
    }
  }