private MoreLikeThis getMoreLikeThis() { MoreLikeThis mlt = new MoreLikeThis(reader); // Pass the reader reader mlt.setMaxDocFreqPct(maxPercentage); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinDocFreq(minDocFreq); mlt.setMinTermFreq(minTermFreq); mlt.setAnalyzer(analyzer); mlt.setFieldNames(new String[] {"text"}); // specify the fields for similiarity return mlt; }
public void testTopN() throws Exception { int numDocs = 100; int topN = 25; // add series of docs with terms of decreasing df Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numDocs; i++) { addDoc(writer, generateStrSeq(0, i + 1)); } IndexReader reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMaxQueryTerms(topN); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); // perform MLT query String likeText = ""; for (String text : generateStrSeq(0, numDocs)) { likeText += text + " "; } BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText)); // check best terms are topN of highest idf Collection<BooleanClause> clauses = query.clauses(); assertEquals("Expected" + topN + "clauses only!", topN, clauses.size()); Term[] expectedTerms = new Term[topN]; int idx = 0; for (String text : generateStrSeq(numDocs - topN, topN)) { expectedTerms[idx++] = new Term("text", text); } for (BooleanClause clause : clauses) { Term term = ((TermQuery) clause.getQuery()).getTerm(); assertTrue(Arrays.asList(expectedTerms).contains(term)); } // clean up reader.close(); dir.close(); analyzer.close(); }
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161") public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception { IndexReader reader = null; Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); try { int maxQueryTerms = 25; String[] itShopItemForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" }; String[] itShopItemNotForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"}; String[] clothesShopItemForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"}; String[] clothesShopItemNotForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" }; // add series of shop docs RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < 300; i++) { addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale); } for (int i = 0; i < 300; i++) { addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale); } // Input Document is a clothes shop int inputDocId = addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale); reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(analyzer); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {FOR_SALE, NOT_FOR_SALE}); // perform MLT query BooleanQuery query = (BooleanQuery) mlt.like(inputDocId); Collection<BooleanClause> clauses = query.clauses(); Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>(); for (String itemForSale : clothesShopItemForSale) { BooleanClause booleanClause = new BooleanClause( new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD); expectedClothesShopClauses.add(booleanClause); } for (String itemNotForSale : clothesShopItemNotForSale) { BooleanClause booleanClause = new BooleanClause( new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD); expectedClothesShopClauses.add(booleanClause); } for (BooleanClause expectedClause : expectedClothesShopClauses) { assertTrue(clauses.contains(expectedClause)); } } finally { // clean up if (reader != null) { reader.close(); } dir.close(); analyzer.close(); } }