// LUCENE-5725 public void testMultiValues() throws Exception { MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); BooleanQuery query = (BooleanQuery) mlt.like( "text", new StringReader("lucene"), new StringReader("lucene release"), new StringReader("apache"), new StringReader("apache lucene")); Collection<BooleanClause> clauses = query.clauses(); assertEquals("Expected 2 clauses only!", 2, clauses.size()); for (BooleanClause clause : clauses) { Term term = ((TermQuery) clause.getQuery()).getTerm(); assertTrue( Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term)); } analyzer.close(); }
private MoreLikeThis getMoreLikeThis() { MoreLikeThis mlt = new MoreLikeThis(reader); // Pass the reader reader mlt.setMaxDocFreqPct(maxPercentage); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinDocFreq(minDocFreq); mlt.setMinTermFreq(minTermFreq); mlt.setAnalyzer(analyzer); mlt.setFieldNames(new String[] {"text"}); // specify the fields for similiarity return mlt; }
// LUCENE-3326 public void testMultiFields() throws Exception { MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text", "foobar"}); mlt.like("foobar", new StringReader("this is a test")); analyzer.close(); }
public void testTopN() throws Exception { int numDocs = 100; int topN = 25; // add series of docs with terms of decreasing df Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numDocs; i++) { addDoc(writer, generateStrSeq(0, i + 1)); } IndexReader reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMaxQueryTerms(topN); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); // perform MLT query String likeText = ""; for (String text : generateStrSeq(0, numDocs)) { likeText += text + " "; } BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText)); // check best terms are topN of highest idf Collection<BooleanClause> clauses = query.clauses(); assertEquals("Expected" + topN + "clauses only!", topN, clauses.size()); Term[] expectedTerms = new Term[topN]; int idx = 0; for (String text : generateStrSeq(numDocs - topN, topN)) { expectedTerms[idx++] = new Term("text", text); } for (BooleanClause clause : clauses) { Term term = ((TermQuery) clause.getQuery()).getTerm(); assertTrue(Arrays.asList(expectedTerms).contains(term)); } // clean up reader.close(); dir.close(); analyzer.close(); }
public void testBoostFactor() throws Throwable { Map<String, Float> originalValues = getOriginalValues(); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); // this mean that every term boost factor will be multiplied by this // number float boostFactor = 5; mlt.setBoostFactor(boostFactor); BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release")); Collection<BooleanClause> clauses = query.clauses(); assertEquals( "Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size()); for (BooleanClause clause : clauses) { BoostQuery bq = (BoostQuery) clause.getQuery(); TermQuery tq = (TermQuery) bq.getQuery(); Float termBoost = originalValues.get(tq.getTerm().text()); assertNotNull("Expected term " + tq.getTerm().text(), termBoost); float totalBoost = termBoost * boostFactor; assertEquals( "Expected boost of " + totalBoost + " for term '" + tq.getTerm().text() + "' got " + bq.getBoost(), totalBoost, bq.getBoost(), 0.0001); } analyzer.close(); }
private Map<String, Float> getOriginalValues() throws IOException { Map<String, Float> originalValues = new HashMap<>(); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release")); Collection<BooleanClause> clauses = query.clauses(); for (BooleanClause clause : clauses) { BoostQuery bq = (BoostQuery) clause.getQuery(); TermQuery tq = (TermQuery) bq.getQuery(); originalValues.put(tq.getTerm().text(), bq.getBoost()); } analyzer.close(); return originalValues; }
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161") public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception { IndexReader reader = null; Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); try { int maxQueryTerms = 25; String[] itShopItemForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" }; String[] itShopItemNotForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"}; String[] clothesShopItemForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"}; String[] clothesShopItemNotForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" }; // add series of shop docs RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < 300; i++) { addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale); } for (int i = 0; i < 300; i++) { addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale); } // Input Document is a clothes shop int inputDocId = addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale); reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(analyzer); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {FOR_SALE, NOT_FOR_SALE}); // perform MLT query BooleanQuery query = (BooleanQuery) mlt.like(inputDocId); Collection<BooleanClause> clauses = query.clauses(); Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>(); for (String itemForSale : clothesShopItemForSale) { BooleanClause booleanClause = new BooleanClause( new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD); expectedClothesShopClauses.add(booleanClause); } for (String itemNotForSale : clothesShopItemNotForSale) { BooleanClause booleanClause = new BooleanClause( new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD); expectedClothesShopClauses.add(booleanClause); } for (BooleanClause expectedClause : expectedClothesShopClauses) { assertTrue(clauses.contains(expectedClause)); } } finally { // clean up if (reader != null) { reader.close(); } dir.close(); analyzer.close(); } }