@Test public void testAnyOnArrayLiteral() throws Exception { Reference ref = createReference("d", DataTypes.STRING); Literal stringArrayLiteral = Literal.newLiteral( new Object[] {new BytesRef("a"), new BytesRef("b"), new BytesRef("c")}, new ArrayType(DataTypes.STRING)); // col != ANY (1,2,3) Query neqQuery = convert(whereClause(AnyNeqOperator.NAME, ref, stringArrayLiteral)); assertThat(neqQuery, instanceOf(FilteredQuery.class)); assertThat(((FilteredQuery) neqQuery).getFilter(), instanceOf(BooleanFilter.class)); BooleanFilter filter = (BooleanFilter) ((FilteredQuery) neqQuery).getFilter(); assertThat(filter.toString(), is("BooleanFilter(-BooleanFilter(+d:a +d:b +d:c))")); // col like any (1,2,3) Query likeQuery = convert(whereClause(AnyLikeOperator.NAME, ref, stringArrayLiteral)); assertThat(likeQuery, instanceOf(BooleanQuery.class)); BooleanQuery likeBQuery = (BooleanQuery) likeQuery; assertThat(likeBQuery.clauses().size(), is(3)); for (int i = 0; i < 2; i++) { // like --> XConstantScoreQuery with regexp-filter Query filteredQuery = likeBQuery.clauses().get(i).getQuery(); assertThat(filteredQuery, instanceOf(XConstantScoreQuery.class)); assertThat(((XConstantScoreQuery) filteredQuery).getFilter(), instanceOf(RegexpFilter.class)); } // col not like any (1,2,3) Query notLikeQuery = convert(whereClause(AnyNotLikeOperator.NAME, ref, stringArrayLiteral)); assertThat(notLikeQuery, instanceOf(BooleanQuery.class)); BooleanQuery notLikeBQuery = (BooleanQuery) notLikeQuery; assertThat(notLikeBQuery.clauses(), hasSize(1)); BooleanClause clause = notLikeBQuery.clauses().get(0); assertThat(clause.getOccur(), is(BooleanClause.Occur.MUST_NOT)); assertThat(((BooleanQuery) clause.getQuery()).clauses(), hasSize(3)); for (BooleanClause innerClause : ((BooleanQuery) clause.getQuery()).clauses()) { assertThat(innerClause.getOccur(), is(BooleanClause.Occur.MUST)); assertThat(innerClause.getQuery(), instanceOf(XConstantScoreQuery.class)); assertThat( ((XConstantScoreQuery) innerClause.getQuery()).getFilter(), instanceOf(RegexpFilter.class)); } // col < any (1,2,3) Query ltQuery2 = convert(whereClause(AnyLtOperator.NAME, ref, stringArrayLiteral)); assertThat(ltQuery2, instanceOf(BooleanQuery.class)); BooleanQuery ltBQuery = (BooleanQuery) ltQuery2; assertThat(ltBQuery.toString(), is("(d:{* TO a} d:{* TO b} d:{* TO c})~1")); }
public void testNormalization() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); GeoBoundingBoxQueryBuilder qb = createTestQueryBuilder(); if (getCurrentTypes().length != 0 && "mapped_geo".equals(qb.fieldName())) { // only execute this test if we are running on a valid geo field qb.setCorners(200, 200, qb.bottomRight().getLat(), qb.bottomRight().getLon()); qb.setValidationMethod(GeoValidationMethod.COERCE); Query query = qb.toQuery(createShardContext()); if (query instanceof ConstantScoreQuery) { ConstantScoreQuery result = (ConstantScoreQuery) query; BooleanQuery bboxFilter = (BooleanQuery) result.getQuery(); for (BooleanClause clause : bboxFilter.clauses()) { LegacyNumericRangeQuery boundary = (LegacyNumericRangeQuery) clause.getQuery(); if (boundary.getMax() != null) { assertTrue( "If defined, non of the maximum range values should be larger than 180", boundary.getMax().intValue() <= 180); } } } else { assertTrue( "memory queries should result in LegacyInMemoryGeoBoundingBoxQuery", query instanceof LegacyInMemoryGeoBoundingBoxQuery); } } }
// LUCENE-5725 public void testMultiValues() throws Exception { MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); BooleanQuery query = (BooleanQuery) mlt.like( "text", new StringReader("lucene"), new StringReader("lucene release"), new StringReader("apache"), new StringReader("apache lucene")); Collection<BooleanClause> clauses = query.clauses(); assertEquals("Expected 2 clauses only!", 2, clauses.size()); for (BooleanClause clause : clauses) { Term term = ((TermQuery) clause.getQuery()).getTerm(); assertTrue( Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term)); } analyzer.close(); }
public Query createCommonTermsQuery( String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, MappedFieldType fieldType) { Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur); if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) booleanQuery; ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery( highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery) booleanQuery).isCoordDisabled(), fieldType); for (BooleanClause clause : bq.clauses()) { if (!(clause.getQuery() instanceof TermQuery)) { return booleanQuery; } query.add(((TermQuery) clause.getQuery()).getTerm()); } return query; } return booleanQuery; }
public Hits search( long companyId, long groupId, long[] categoryIds, long threadId, String keywords) throws SystemException { Searcher searcher = null; try { HitsImpl hits = new HitsImpl(); BooleanQuery contextQuery = new BooleanQuery(); LuceneUtil.addRequiredTerm(contextQuery, LuceneFields.PORTLET_ID, Indexer.PORTLET_ID); if (groupId > 0) { LuceneUtil.addRequiredTerm(contextQuery, LuceneFields.GROUP_ID, groupId); } if ((categoryIds != null) && (categoryIds.length > 0)) { BooleanQuery categoryIdsQuery = new BooleanQuery(); for (int i = 0; i < categoryIds.length; i++) { Term term = new Term("categoryId", String.valueOf(categoryIds[i])); TermQuery termQuery = new TermQuery(term); categoryIdsQuery.add(termQuery, BooleanClause.Occur.SHOULD); } contextQuery.add(categoryIdsQuery, BooleanClause.Occur.MUST); } if (threadId > 0) { LuceneUtil.addTerm(contextQuery, "threadId", threadId); } BooleanQuery searchQuery = new BooleanQuery(); if (Validator.isNotNull(keywords)) { LuceneUtil.addTerm(searchQuery, LuceneFields.USER_NAME, keywords); LuceneUtil.addTerm(searchQuery, LuceneFields.TITLE, keywords); LuceneUtil.addTerm(searchQuery, LuceneFields.CONTENT, keywords); LuceneUtil.addTerm(searchQuery, LuceneFields.TAG_ENTRY, keywords); } BooleanQuery fullQuery = new BooleanQuery(); fullQuery.add(contextQuery, BooleanClause.Occur.MUST); if (searchQuery.clauses().size() > 0) { fullQuery.add(searchQuery, BooleanClause.Occur.MUST); } searcher = LuceneUtil.getSearcher(companyId); hits.recordHits(searcher.search(fullQuery), searcher); return hits; } catch (Exception e) { return LuceneUtil.closeSearcher(searcher, keywords, e); } }
@Override public Query rewrite(IndexReader r) throws IOException { if (query.clauses().size() == 0) { // baseQuery given to the ctor was null + no drill-downs were added // note that if only baseQuery was given to the ctor, but no drill-down terms // is fine, since the rewritten query will be the original base query. throw new IllegalStateException("no base query or drill-down categories given"); } return query; }
public void testMinShouldMatchFilterWithoutShouldClauses() throws Exception { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.filter(new BoolQueryBuilder().must(new MatchAllQueryBuilder())); Query query = boolQueryBuilder.toQuery(createShardContext()); assertThat(query, instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) query; assertThat(booleanQuery.getMinimumNumberShouldMatch(), equalTo(0)); assertThat(booleanQuery.clauses().size(), equalTo(1)); BooleanClause booleanClause = booleanQuery.clauses().get(0); assertThat(booleanClause.getOccur(), equalTo(BooleanClause.Occur.FILTER)); assertThat(booleanClause.getQuery(), instanceOf(BooleanQuery.class)); BooleanQuery innerBooleanQuery = (BooleanQuery) booleanClause.getQuery(); // we didn't set minimum should match initially, there are no should clauses so it should be 0 assertThat(innerBooleanQuery.getMinimumNumberShouldMatch(), equalTo(0)); assertThat(innerBooleanQuery.clauses().size(), equalTo(1)); BooleanClause innerBooleanClause = innerBooleanQuery.clauses().get(0); assertThat(innerBooleanClause.getOccur(), equalTo(BooleanClause.Occur.MUST)); assertThat(innerBooleanClause.getQuery(), instanceOf(MatchAllDocsQuery.class)); }
public void testToQueryBooleanQueryMultipleBoosts() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); int numBoosts = randomIntBetween(2, 10); float[] boosts = new float[numBoosts + 1]; String queryStringPrefix = ""; String queryStringSuffix = ""; for (int i = 0; i < boosts.length - 1; i++) { float boost = 2.0f / randomIntBetween(3, 20); boosts[i] = boost; queryStringPrefix += "("; queryStringSuffix += ")^" + boost; } String queryString = queryStringPrefix + "foo bar" + queryStringSuffix; float mainBoost = 2.0f / randomIntBetween(3, 20); boosts[boosts.length - 1] = mainBoost; QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder(queryString) .field(STRING_FIELD_NAME) .minimumShouldMatch("2") .boost(mainBoost); Query query = queryStringQueryBuilder.toQuery(createShardContext()); for (int i = boosts.length - 1; i >= 0; i--) { assertThat(query, instanceOf(BoostQuery.class)); BoostQuery boostQuery = (BoostQuery) query; assertThat(boostQuery.getBoost(), equalTo(boosts[i])); query = boostQuery.getQuery(); } assertThat(query, instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) query; assertThat(booleanQuery.getMinimumNumberShouldMatch(), equalTo(2)); assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.SHOULD)); assertThat( booleanQuery.clauses().get(0).getQuery(), equalTo(new TermQuery(new Term(STRING_FIELD_NAME, "foo")))); assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.SHOULD)); assertThat( booleanQuery.clauses().get(1).getQuery(), equalTo(new TermQuery(new Term(STRING_FIELD_NAME, "bar")))); }
private static JsonObject convertBooleanQuery(BooleanQuery booleanQuery) { JsonArray musts = new JsonArray(); JsonArray shoulds = new JsonArray(); JsonArray mustNots = new JsonArray(); JsonArray filters = new JsonArray(); for (BooleanClause clause : booleanQuery.clauses()) { switch (clause.getOccur()) { case MUST: musts.add(fromLuceneQuery(clause.getQuery())); break; case FILTER: filters.add(fromLuceneQuery(clause.getQuery())); break; case MUST_NOT: mustNots.add(fromLuceneQuery(clause.getQuery())); break; case SHOULD: shoulds.add(fromLuceneQuery(clause.getQuery())); break; } } JsonObject clauses = new JsonObject(); if (musts.size() > 1) { clauses.add("must", musts); } else if (musts.size() == 1) { clauses.add("must", musts.iterator().next()); } if (shoulds.size() > 1) { clauses.add("should", shoulds); } else if (shoulds.size() == 1) { clauses.add("should", shoulds.iterator().next()); } if (mustNots.size() > 1) { clauses.add("must_not", mustNots); } else if (mustNots.size() == 1) { clauses.add("must_not", mustNots.iterator().next()); } if (filters.size() > 1) { clauses.add("filter", filters); } else if (filters.size() == 1) { clauses.add("filter", filters.iterator().next()); } JsonObject bool = new JsonObject(); bool.add("bool", clauses); return bool; }
public static SolrReaderSetScorer createReaderSetScorer( Weight weight, AtomicReaderContext context, Bits acceptDocs, SolrIndexSearcher searcher, String authorities, AtomicReader reader) throws IOException { DocSet readableDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities); if (readableDocSet == null) { String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc())); BooleanQuery bQuery = new BooleanQuery(); for (String current : auths) { bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD); } DocSet aclDocs = searcher.getDocSet(bQuery); BooleanQuery aQuery = new BooleanQuery(); for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/ ) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = searcher.getAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID).get(docID); SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID); Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID)); aQuery.add(query, Occur.SHOULD); if ((aQuery.clauses().size() > 999) || !it.hasNext()) { DocSet docsForAclId = searcher.getDocSet(aQuery); readableDocSet = readableDocSet.union(docsForAclId); aQuery = new BooleanQuery(); } } // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) readableDocSet = readableDocSet.andNot(aclDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet); } // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, // authorities, readableDocSet) // plus check of course, for presence in cache at start of method. return new SolrReaderSetScorer(weight, readableDocSet, context, acceptDocs, searcher); }
public void testTopN() throws Exception { int numDocs = 100; int topN = 25; // add series of docs with terms of decreasing df Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numDocs; i++) { addDoc(writer, generateStrSeq(0, i + 1)); } IndexReader reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMaxQueryTerms(topN); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); // perform MLT query String likeText = ""; for (String text : generateStrSeq(0, numDocs)) { likeText += text + " "; } BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText)); // check best terms are topN of highest idf Collection<BooleanClause> clauses = query.clauses(); assertEquals("Expected" + topN + "clauses only!", topN, clauses.size()); Term[] expectedTerms = new Term[topN]; int idx = 0; for (String text : generateStrSeq(numDocs - topN, topN)) { expectedTerms[idx++] = new Term("text", text); } for (BooleanClause clause : clauses) { Term term = ((TermQuery) clause.getQuery()).getTerm(); assertTrue(Arrays.asList(expectedTerms).contains(term)); } // clean up reader.close(); dir.close(); analyzer.close(); }
public void testToQueryFieldsWildcard() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("test") .field("mapped_str*") .useDisMax(false) .toQuery(createShardContext()); assertThat(query, instanceOf(BooleanQuery.class)); BooleanQuery bQuery = (BooleanQuery) query; assertThat(bQuery.clauses().size(), equalTo(2)); assertThat( assertBooleanSubQuery(query, TermQuery.class, 0).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test"))); assertThat( assertBooleanSubQuery(query, TermQuery.class, 1).getTerm(), equalTo(new Term(STRING_FIELD_NAME_2, "test"))); }
public void testBoostFactor() throws Throwable { Map<String, Float> originalValues = getOriginalValues(); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); // this mean that every term boost factor will be multiplied by this // number float boostFactor = 5; mlt.setBoostFactor(boostFactor); BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release")); Collection<BooleanClause> clauses = query.clauses(); assertEquals( "Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size()); for (BooleanClause clause : clauses) { BoostQuery bq = (BoostQuery) clause.getQuery(); TermQuery tq = (TermQuery) bq.getQuery(); Float termBoost = originalValues.get(tq.getTerm().text()); assertNotNull("Expected term " + tq.getTerm().text(), termBoost); float totalBoost = termBoost * boostFactor; assertEquals( "Expected boost of " + totalBoost + " for term '" + tq.getTerm().text() + "' got " + bq.getBoost(), totalBoost, bq.getBoost(), 0.0001); } analyzer.close(); }
private Map<String, Float> getOriginalValues() throws IOException { Map<String, Float> originalValues = new HashMap<>(); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release")); Collection<BooleanClause> clauses = query.clauses(); for (BooleanClause clause : clauses) { BoostQuery bq = (BoostQuery) clause.getQuery(); TermQuery tq = (TermQuery) bq.getQuery(); originalValues.put(tq.getTerm().text(), bq.getBoost()); } analyzer.close(); return originalValues; }
static void assertLateParsingQuery(Query query, String type, String id) throws IOException { assertThat(query, instanceOf(HasChildQueryBuilder.LateParsingQuery.class)); HasChildQueryBuilder.LateParsingQuery lateParsingQuery = (HasChildQueryBuilder.LateParsingQuery) query; assertThat(lateParsingQuery.getInnerQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) lateParsingQuery.getInnerQuery(); assertThat(booleanQuery.clauses().size(), equalTo(2)); // check the inner ids query, we have to call rewrite to get to check the type it's executed // against assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST)); assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(TermsQuery.class)); TermsQuery termsQuery = (TermsQuery) booleanQuery.clauses().get(0).getQuery(); Query rewrittenTermsQuery = termsQuery.rewrite(null); assertThat(rewrittenTermsQuery, instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) rewrittenTermsQuery; assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanTermsQuery = (BooleanQuery) constantScoreQuery.getQuery(); assertThat(booleanTermsQuery.clauses().size(), equalTo(1)); assertThat(booleanTermsQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.SHOULD)); assertThat(booleanTermsQuery.clauses().get(0).getQuery(), instanceOf(TermQuery.class)); TermQuery termQuery = (TermQuery) booleanTermsQuery.clauses().get(0).getQuery(); assertThat(termQuery.getTerm().field(), equalTo(UidFieldMapper.NAME)); // we want to make sure that the inner ids query gets executed against the child type rather // than the main type we initially set to the context BytesRef[] ids = Uid.createUidsForTypesAndIds( Collections.singletonList(type), Collections.singletonList(id)); assertThat(termQuery.getTerm().bytes(), equalTo(ids[0])); // check the type filter assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER)); assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery typeConstantScoreQuery = (ConstantScoreQuery) booleanQuery.clauses().get(1).getQuery(); assertThat(typeConstantScoreQuery.getQuery(), instanceOf(TermQuery.class)); TermQuery typeTermQuery = (TermQuery) typeConstantScoreQuery.getQuery(); assertThat(typeTermQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME)); assertThat(typeTermQuery.getTerm().text(), equalTo(type)); }
@Override protected void doAssertLuceneQuery( BoolQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException { if (!queryBuilder.hasClauses()) { assertThat(query, instanceOf(MatchAllDocsQuery.class)); } else { List<BooleanClause> clauses = new ArrayList<>(); clauses.addAll(getBooleanClauses(queryBuilder.must(), BooleanClause.Occur.MUST, context)); clauses.addAll( getBooleanClauses(queryBuilder.mustNot(), BooleanClause.Occur.MUST_NOT, context)); clauses.addAll(getBooleanClauses(queryBuilder.should(), BooleanClause.Occur.SHOULD, context)); clauses.addAll(getBooleanClauses(queryBuilder.filter(), BooleanClause.Occur.FILTER, context)); if (clauses.isEmpty()) { assertThat(query, instanceOf(MatchAllDocsQuery.class)); } else { assertThat(query, instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) query; assertThat(booleanQuery.isCoordDisabled(), equalTo(queryBuilder.disableCoord())); if (queryBuilder.adjustPureNegative()) { boolean isNegative = true; for (BooleanClause clause : clauses) { if (clause.isProhibited() == false) { isNegative = false; break; } } if (isNegative) { clauses.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST)); } } assertThat(booleanQuery.clauses().size(), equalTo(clauses.size())); Iterator<BooleanClause> clauseIterator = clauses.iterator(); for (BooleanClause booleanClause : booleanQuery.getClauses()) { assertThat(booleanClause, instanceOf(clauseIterator.next().getClass())); } } } }
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161") public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception { IndexReader reader = null; Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); try { int maxQueryTerms = 25; String[] itShopItemForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" }; String[] itShopItemNotForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"}; String[] clothesShopItemForSale = new String[] {"tie", "trousers", "shoes", "skirt", "hat"}; String[] clothesShopItemNotForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" }; // add series of shop docs RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < 300; i++) { addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale); } for (int i = 0; i < 300; i++) { addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale); } // Input Document is a clothes shop int inputDocId = addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale); reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(analyzer); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {FOR_SALE, NOT_FOR_SALE}); // perform MLT query BooleanQuery query = (BooleanQuery) mlt.like(inputDocId); Collection<BooleanClause> clauses = query.clauses(); Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>(); for (String itemForSale : clothesShopItemForSale) { BooleanClause booleanClause = new BooleanClause( new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD); expectedClothesShopClauses.add(booleanClause); } for (String itemNotForSale : clothesShopItemNotForSale) { BooleanClause booleanClause = new BooleanClause( new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD); expectedClothesShopClauses.add(booleanClause); } for (BooleanClause expectedClause : expectedClothesShopClauses) { assertTrue(clauses.contains(expectedClause)); } } finally { // clean up if (reader != null) { reader.close(); } dir.close(); analyzer.close(); } }
/** * Construct the query (using spans). This method will be called recursively. * * @param q Query * @param scope Author, affiliation, or Reference * @param level Used only for formatting (indentation) the level of recursion * @param andSpans ArrayList of Spans that should be 'and' * @param orSpans ArrayList of Spans that should be 'or' * @param notSpans ArrayList of Spans that should be 'not' * @return SpanQuery */ private SpanQuery buildQuery_recursive( Query q, String scope, int level, ArrayList<SpanQuery> andSpans, ArrayList<SpanQuery> orSpans, ArrayList<SpanQuery> notSpans) { BooleanQuery castQuery = (BooleanQuery) q; String subscope = null; for (BooleanClause clause : castQuery.getClauses()) { Class queryclazz = clause.getQuery().getClass(); System.out.println( repeat(' ', level) + "[" + queryclazz + "][" + clause.getOccur() + "] " + clause.toString()); if (queryclazz == BooleanQuery.class) { System.out.println("Number of Clauses is " + castQuery.clauses().size()); System.out.println("Minimum Number to Match is " + castQuery.getMinimumNumberShouldMatch()); if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == TermQuery.class) { TermQuery tq = (TermQuery) clause.getQuery(); if (tq.getTerm().field().compareTo(SUBSCOPE_FIELD) == 0) { // Set the subscope subscope = tq.getTerm().text(); // Need to add a term here (otherwise we have problems) WildcardQuery wildcard = new WildcardQuery(new Term(scope, "*")); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { notSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } } else if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildTermQuery(scope, tq)); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildTermQuery(scope, tq)); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { notSpans.add(buildTermQuery(scope, tq)); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildTermQuery(scope, tq)); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == WildcardQuery.class) { if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildWildcardQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildWildcardQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add(buildWildcardQuery(scope, clause.getQuery())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildWildcardQuery(scope, clause.getQuery())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == PrefixQuery.class) { if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildPrefixQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildPrefixQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add(buildPrefixQuery(scope, clause.getQuery())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildPrefixQuery(scope, clause.getQuery())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == PhraseQuery.class) { if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildPhraseQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildPhraseQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add(buildPhraseQuery(scope, clause.getQuery())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildPhraseQuery(scope, clause.getQuery())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else { System.out.println("[" + q.getClass() + "]"); } } ArrayList<SpanQuery> includeSpans = new ArrayList<SpanQuery>(); ; // Add the 'and' queries to the includeSpans (if there were any) if (!andSpans.isEmpty()) { if (andSpans.size() > 1) { includeSpans.add(new SpanAndQuery(andSpans.toArray(new SpanQuery[andSpans.size()]))); } else { includeSpans.add(andSpans.get(0)); } } // Add the 'or' queries to the includeSpans (if there were any) if (!orSpans.isEmpty()) { includeSpans.add(new SpanOrQuery(orSpans.toArray(new SpanQuery[orSpans.size()]))); } // Exclude the 'not' queries from the includeSpans (if there were any) if (!notSpans.isEmpty()) { if (includeSpans.size() > 1) { if (notSpans.size() > 1) { return new SpanNotQuery( new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])), new SpanAndQuery(notSpans.toArray(new SpanQuery[notSpans.size()]))); } else { return new SpanNotQuery( new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])), notSpans.get(0)); } } else { if (notSpans.size() > 1) { return new SpanNotQuery( includeSpans.get(0), new SpanAndQuery(notSpans.toArray(new SpanQuery[notSpans.size()]))); } else { return new SpanNotQuery(includeSpans.get(0), notSpans.get(0)); } } } else { if (includeSpans.size() > 1) { return new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])); } else { return includeSpans.get(0); } } }
/** * Process properties to query sparse content directly. * * @param request * @param query * @param asAnon * @return * @throws StorageClientException * @throws AccessDeniedException */ public SolrSearchResultSet processQuery( SlingHttpServletRequest request, Query query, boolean asAnon) throws SolrSearchException { try { // use solr parsing to get the terms from the query string QueryParser parser = new QueryParser(Version.LUCENE_40, "id", new TextField().getQueryAnalyzer()); org.apache.lucene.search.Query luceneQuery = parser.parse(query.getQueryString()); Map<String, Object> props = Maps.newHashMap(); if (luceneQuery instanceof BooleanQuery) { BooleanQuery boolLucQuery = (BooleanQuery) luceneQuery; int orCount = 0; List<BooleanClause> clauses = boolLucQuery.clauses(); for (BooleanClause clause : clauses) { org.apache.lucene.search.Query clauseQuery = clause.getQuery(); Map<String, Object> subOrs = Maps.newHashMap(); // we support 1 level of nesting for OR clauses if (clauseQuery instanceof BooleanQuery) { for (BooleanClause subclause : ((BooleanQuery) clauseQuery).clauses()) { org.apache.lucene.search.Query subclauseQuery = subclause.getQuery(); extractTerms(subclause, subclauseQuery, props, subOrs); } props.put("orset" + orCount, subOrs); orCount++; } else { extractTerms(clause, clauseQuery, props, subOrs); if (!subOrs.isEmpty()) { props.put("orset" + orCount, subOrs); orCount++; } } } } else { extractTerms(null, luceneQuery, props, null); } // add the options to the parameters but prepend _ to avoid collision for (Entry<String, String> option : query.getOptions().entrySet()) { props.put("_" + option.getKey(), option.getValue()); } Session session = StorageClientUtils.adaptToSession( request.getResourceResolver().adaptTo(javax.jcr.Session.class)); ContentManager cm = session.getContentManager(); long tquery = System.currentTimeMillis(); Iterable<Content> items = cm.find(props); tquery = System.currentTimeMillis() - tquery; try { if (tquery > verySlowQueryThreshold) { SLOW_QUERY_LOGGER.error( "Very slow sparse query {} ms {} ", tquery, URLDecoder.decode(query.toString(), "UTF-8")); } else if (tquery > slowQueryThreshold) { SLOW_QUERY_LOGGER.warn( "Slow sparse query {} ms {} ", tquery, URLDecoder.decode(query.toString(), "UTF-8")); } } catch (UnsupportedEncodingException e) { // quietly swallow this exception LOGGER.debug(e.getLocalizedMessage(), e); } SolrSearchResultSet rs = new SparseSearchResultSet(items, defaultMaxResults); return rs; } catch (AccessDeniedException e) { throw new SolrSearchException(500, e.getMessage()); } catch (StorageClientException e) { throw new SolrSearchException(500, e.getMessage()); } catch (ParseException e) { throw new SolrSearchException(500, e.getMessage()); } }
/** @see #toString(Query,IndexSchema) */ public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException { boolean writeBoost = true; if (query instanceof TermQuery) { TermQuery q = (TermQuery) query; Term t = q.getTerm(); FieldType ft = writeFieldName(t.field(), schema, out, flags); writeFieldVal(t.bytes(), ft, out, flags); } else if (query instanceof TermRangeQuery) { TermRangeQuery q = (TermRangeQuery) query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesLower() ? '[' : '{'); BytesRef lt = q.getLowerTerm(); BytesRef ut = q.getUpperTerm(); if (lt == null) { out.append('*'); } else { writeFieldVal(lt, ft, out, flags); } out.append(" TO "); if (ut == null) { out.append('*'); } else { writeFieldVal(ut, ft, out, flags); } out.append(q.includesUpper() ? ']' : '}'); } else if (query instanceof NumericRangeQuery) { NumericRangeQuery q = (NumericRangeQuery) query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesMin() ? '[' : '{'); Number lt = q.getMin(); Number ut = q.getMax(); if (lt == null) { out.append('*'); } else { out.append(lt.toString()); } out.append(" TO "); if (ut == null) { out.append('*'); } else { out.append(ut.toString()); } out.append(q.includesMax() ? ']' : '}'); } else if (query instanceof BooleanQuery) { BooleanQuery q = (BooleanQuery) query; boolean needParens = false; if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) { needParens = true; } if (needParens) { out.append('('); } boolean first = true; for (BooleanClause c : q.clauses()) { if (!first) { out.append(' '); } else { first = false; } if (c.isProhibited()) { out.append('-'); } else if (c.isRequired()) { out.append('+'); } Query subQuery = c.getQuery(); boolean wrapQuery = false; // TODO: may need to put parens around other types // of queries too, depending on future syntax. if (subQuery instanceof BooleanQuery) { wrapQuery = true; } if (wrapQuery) { out.append('('); } toString(subQuery, schema, out, flags); if (wrapQuery) { out.append(')'); } } if (needParens) { out.append(')'); } if (q.getMinimumNumberShouldMatch() > 0) { out.append('~'); out.append(Integer.toString(q.getMinimumNumberShouldMatch())); } if (q.isCoordDisabled()) { out.append("/no_coord"); } } else if (query instanceof PrefixQuery) { PrefixQuery q = (PrefixQuery) query; Term prefix = q.getPrefix(); FieldType ft = writeFieldName(prefix.field(), schema, out, flags); out.append(prefix.text()); out.append('*'); } else if (query instanceof WildcardQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof FuzzyQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof ConstantScoreQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof WrappedQuery) { WrappedQuery q = (WrappedQuery) query; out.append(q.getOptions()); toString(q.getWrappedQuery(), schema, out, flags); writeBoost = false; // we don't use the boost on wrapped queries } else { out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')'); writeBoost = false; } if (writeBoost && query.getBoost() != 1.0f) { out.append("^"); out.append(Float.toString(query.getBoost())); } }