/** * Search, collecting hits with a {@link Collector}, and computing drill down and sideways counts. */ @SuppressWarnings({"rawtypes", "unchecked"}) public DrillSidewaysResult search(DrillDownQuery query, Collector hitCollector) throws IOException { Map<String, Integer> drillDownDims = query.getDims(); FacetsCollector drillDownCollector = new FacetsCollector(); if (drillDownDims.isEmpty()) { // There are no drill-down dims, so there is no // drill-sideways to compute: searcher.search(query, MultiCollector.wrap(hitCollector, drillDownCollector)); return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null); } BooleanQuery ddq = query.getBooleanQuery(); BooleanClause[] clauses = ddq.getClauses(); Query baseQuery; int startClause; if (clauses.length == drillDownDims.size()) { // TODO: we could optimize this pure-browse case by // making a custom scorer instead: baseQuery = new MatchAllDocsQuery(); startClause = 0; } else { assert clauses.length == 1 + drillDownDims.size(); baseQuery = clauses[0].getQuery(); startClause = 1; } FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()]; for (int i = 0; i < drillSidewaysCollectors.length; i++) { drillSidewaysCollectors[i] = new FacetsCollector(); } Query[] drillDownQueries = new Query[clauses.length - startClause]; for (int i = startClause; i < clauses.length; i++) { drillDownQueries[i - startClause] = clauses[i].getQuery(); } DrillSidewaysQuery dsq = new DrillSidewaysQuery( baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce()); searcher.search(dsq, hitCollector); return new DrillSidewaysResult( buildFacetsResult( drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null); }
/** * Search with facets through the {@link AdaptiveFacetsAccumulator} * * @param indexDir Directory of the search index. * @param taxoDir Directory of the taxonomy index. * @throws Exception on error (no detailed exception handling here for sample simplicity * @return facet results */ public static List<FacetResult> searchWithFacets(Directory indexDir, Directory taxoDir) throws Exception { // prepare index reader and taxonomy. TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); IndexReader indexReader = DirectoryReader.open(indexDir); // prepare searcher to search against IndexSearcher searcher = new IndexSearcher(indexReader); // faceted search is working in 2 steps: // 1. collect matching documents // 2. aggregate facets for collected documents and // generate the requested faceted results from the aggregated facets // step 1: collect matching documents into a collector Query q = new TermQuery(new Term(SimpleUtils.TEXT, "white")); ExampleUtils.log("Query: " + q); // regular collector for scoring matched documents TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true); // docids collector for guiding facets accumulation (scoring disabled) ScoredDocIdCollector docIdsCollecor = ScoredDocIdCollector.create(indexReader.maxDoc(), false); // Faceted search parameters indicate which facets are we interested in FacetSearchParams facetSearchParams = new FacetSearchParams(); facetSearchParams.addFacetRequest(new CountFacetRequest(new CategoryPath("root", "a"), 10)); // search, into both collectors. note: in case only facets accumulation // is required, the topDocCollector part can be totally discarded searcher.search(q, MultiCollector.wrap(topDocsCollector, docIdsCollecor)); // Obtain facets results and print them AdaptiveFacetsAccumulator accumulator = new AdaptiveFacetsAccumulator(facetSearchParams, indexReader, taxo); List<FacetResult> res = accumulator.accumulate(docIdsCollecor.getScoredDocIDs()); int i = 0; for (FacetResult facetResult : res) { ExampleUtils.log("Res " + (i++) + ": " + facetResult); } // we're done, close the index reader and the taxonomy. indexReader.close(); taxo.close(); return res; }
@Override public void preProcess(SearchContext context) { // add specific facets to nested queries... if (context.nestedQueries() != null) { for (Map.Entry<String, BlockJoinQuery> entry : context.nestedQueries().entrySet()) { List<Collector> collectors = context.searcher().removeCollectors(entry.getKey()); if (collectors != null && !collectors.isEmpty()) { if (collectors.size() == 1) { entry.getValue().setCollector(collectors.get(0)); } else { entry .getValue() .setCollector( MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]))); } } } } }
@Override public void search(Query query, Collector collector) throws IOException { // Wrap the caller's collector with various wrappers e.g. those used to siphon // matches off for aggregation or to impose a time-limit on collection. final boolean timeoutSet = searchContext.timeoutInMillis() != -1; final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER; if (timeoutSet) { // TODO: change to use our own counter that uses the scheduler in ThreadPool // throws TimeLimitingCollector.TimeExceededException when timeout has reached collector = Lucene.wrapTimeLimitingCollector( collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis()); } if (terminateAfterSet) { // throws Lucene.EarlyTerminationException when given count is reached collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter()); } if (currentState == Stage.MAIN_QUERY) { if (searchContext.parsedPostFilter() != null) { // this will only get applied to the actual search collector and not // to any scoped collectors, also, it will only be applied to the main collector // since that is where the filter should only work final Weight filterWeight = createNormalizedWeight(searchContext.parsedPostFilter().query(), false); collector = new FilteredCollector(collector, filterWeight); } if (queryCollectors != null && !queryCollectors.isEmpty()) { ArrayList<Collector> allCollectors = new ArrayList<>(queryCollectors.values()); allCollectors.add(collector); collector = MultiCollector.wrap(allCollectors); } // apply the minimum score after multi collector so we filter aggs as well if (searchContext.minimumScore() != null) { collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); } } super.search(query, collector); }
@Override public void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { final boolean timeoutSet = searchContext.timeoutInMillis() != -1; final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER; if (timeoutSet) { // TODO: change to use our own counter that uses the scheduler in ThreadPool // throws TimeLimitingCollector.TimeExceededException when timeout has reached collector = Lucene.wrapTimeLimitingCollector( collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis()); } if (terminateAfterSet) { // throws Lucene.EarlyTerminationException when given count is reached collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter()); } if (currentState == Stage.MAIN_QUERY) { if (searchContext.parsedPostFilter() != null) { // this will only get applied to the actual search collector and not // to any scoped collectors, also, it will only be applied to the main collector // since that is where the filter should only work collector = new FilteredCollector(collector, searchContext.parsedPostFilter().filter()); } if (queryCollectors != null && !queryCollectors.isEmpty()) { ArrayList<Collector> allCollectors = new ArrayList<>(queryCollectors.values()); allCollectors.add(collector); collector = MultiCollector.wrap(allCollectors); } // apply the minimum score after multi collector so we filter aggs as well if (searchContext.minimumScore() != null) { collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); } } // we only compute the doc id set once since within a context, we execute the same query // always... try { if (timeoutSet || terminateAfterSet) { try { super.search(leaves, weight, collector); } catch (TimeLimitingCollector.TimeExceededException e) { assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set"; searchContext.queryResult().searchTimedOut(true); } catch (Lucene.EarlyTerminationException e) { assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set"; searchContext.queryResult().terminatedEarly(true); } if (terminateAfterSet && searchContext.queryResult().terminatedEarly() == null) { searchContext.queryResult().terminatedEarly(false); } } else { super.search(leaves, weight, collector); } } finally { searchContext.clearReleasables(Lifetime.COLLECTION); } }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); final Random r = random(); final IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r)) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB( scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc); int numUniqueChildValues = scaledRandomIntBetween(100, 2000); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet(); int childDocId = 0; int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); boolean filterMe = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("delete", "me", Field.Store.NO)); } if (filterMe) { filteredOrDeletedDocs.add(parentDocId); document.add(new StringField("filter", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = scaledRandomIntBetween(0, 100); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId++)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>()); } if (!markParentAsDeleted && !filterMe) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.commit(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); int max = numUniqueChildValues / 4; for (int i = 0; i < max; i++) { // Simulate a parent update if (random().nextBoolean()) { final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size(); int numberOfUpdates = RandomInts.randomIntBetween( random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5)); for (int j = 0; j < numberOfUpdates; j++) { int parentId; do { parentId = random().nextInt(numParentDocs); } while (filteredOrDeletedDocs.contains(parentId)); String parentUid = Uid.createUid("parent", Integer.toString(parentId)); indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid)); Document document = new Document(); document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); indexWriter.addDocument(document); } indexReader.close(); indexReader = DirectoryReader.open(indexWriter.w, true); searcher = new IndexSearcher(indexReader); engineSearcher = new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); } String childValue = childValues[random().nextInt(numUniqueChildValues)]; int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; // leave min/max set to 0 half the time int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110); int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110); QueryBuilder queryBuilder = hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue))) .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH)) .minChildren(minChildren) .maxChildren(maxChildren) .setShortCircuitCutoff(shortCircuitParentDocSet); // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not // get live docs as acceptedDocs queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me"))); Query query = parseQuery(queryBuilder); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits); if (childValueToParentIds.containsKey(childValue)) { LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader); final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()]; Terms terms = slowLeafReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { int count = entry.getValue().elementsCount; if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); scores[docsEnum.docID()] = new FloatArrayList(entry.getValue()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } MockScorer mockScorer = new MockScorer(scoreType); final LeafCollector leafCollector = expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext()); leafCollector.setScorer(mockScorer); for (int doc = expectedResult.nextSetBit(0); doc < slowLeafReader.maxDoc(); doc = doc + 1 >= expectedResult.length() ? DocIdSetIterator.NO_MORE_DOCS : expectedResult.nextSetBit(doc + 1)) { mockScorer.scores = scores[doc]; leafCollector.collect(doc); } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexWriter.close(); indexReader.close(); directory.close(); }
@Override public void execute(SearchContext context) throws ElasticSearchException { if (context.facets() == null || context.facets().facetCollectors() == null) { return; } if (context.queryResult().facets() != null) { // no need to compute the facets twice, they should be computed on a per context basis return; } // optimize global facet execution, based on filters (don't iterate over all docs), and check // if we have special facets that can be optimized for all execution, do it List<Collector> collectors = context.searcher().removeCollectors(ContextIndexSearcher.Scopes.GLOBAL); if (collectors != null && !collectors.isEmpty()) { Map<Filter, List<Collector>> filtersByCollector = Maps.newHashMap(); for (Collector collector : collectors) { if (collector instanceof OptimizeGlobalFacetCollector) { try { ((OptimizeGlobalFacetCollector) collector).optimizedGlobalExecution(context); } catch (IOException e) { throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e); } } else { Filter filter = Queries.MATCH_ALL_FILTER; if (collector instanceof AbstractFacetCollector) { AbstractFacetCollector facetCollector = (AbstractFacetCollector) collector; if (facetCollector.getFilter() != null) { // we can clear the filter, since we are anyhow going to iterate over it // so no need to double check it... filter = facetCollector.getAndClearFilter(); } } List<Collector> list = filtersByCollector.get(filter); if (list == null) { list = new ArrayList<Collector>(); filtersByCollector.put(filter, list); } list.add(collector); } } // now, go and execute the filters->collector ones for (Map.Entry<Filter, List<Collector>> entry : filtersByCollector.entrySet()) { Filter filter = entry.getKey(); Query query = new DeletionAwareConstantScoreQuery(filter); Filter searchFilter = context.mapperService().searchFilter(context.types()); if (searchFilter != null) { query = new FilteredQuery(query, context.filterCache().cache(searchFilter)); } try { context .searcher() .search( query, MultiCollector.wrap( entry.getValue().toArray(new Collector[entry.getValue().size()]))); } catch (IOException e) { throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e); } } } SearchContextFacets contextFacets = context.facets(); List<Facet> facets = Lists.newArrayListWithCapacity(2); if (contextFacets.facetCollectors() != null) { for (FacetCollector facetCollector : contextFacets.facetCollectors()) { facets.add(facetCollector.facet()); } } context.queryResult().facets(new InternalFacets(facets)); }
@Test public void testRandom() throws Exception { Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600); String[] childValues = new String[numUniqueChildValues]; for (int i = 0; i < numUniqueChildValues; i++) { childValues[i] = Integer.toString(i); } int childDocId = 0; int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000); ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds = new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>(); for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) { boolean markParentAsDeleted = rarely(); String parent = Integer.toString(parentDocId); Document document = new Document(); document.add( new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES)); document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO)); if (markParentAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25); for (int i = 0; i < numChildDocs; i++) { boolean markChildAsDeleted = rarely(); String childValue = childValues[random().nextInt(childValues.length)]; document = new Document(); document.add( new StringField( UidFieldMapper.NAME, Uid.createUid("child", Integer.toString(childDocId)), Field.Store.NO)); document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO)); document.add( new StringField( ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO)); document.add(new StringField("field1", childValue, Field.Store.NO)); if (markChildAsDeleted) { document.add(new StringField("delete", "me", Field.Store.NO)); } indexWriter.addDocument(document); if (!markChildAsDeleted) { NavigableMap<String, FloatArrayList> parentIdToChildScores; if (childValueToParentIds.containsKey(childValue)) { parentIdToChildScores = childValueToParentIds.lget(); } else { childValueToParentIds.put( childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>()); } if (!markParentAsDeleted) { FloatArrayList childScores = parentIdToChildScores.get(parent); if (childScores == null) { parentIdToChildScores.put(parent, childScores = new FloatArrayList()); } childScores.add(1f); } } } } // Delete docs that are marked to be deleted. indexWriter.deleteDocuments(new Term("delete", "me")); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); Engine.Searcher engineSearcher = new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher); ((TestSearchContext) SearchContext.current()) .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher)); TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent")); for (String childValue : childValues) { Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue))); int shortCircuitParentDocSet = random().nextInt(numParentDocs); ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)]; Query query = new ChildrenQuery( "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet); BitSetCollector collector = new BitSetCollector(indexReader.maxDoc()); int numHits = 1 + random().nextInt(25); TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false); searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector)); FixedBitSet actualResult = collector.getResult(); FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc()); MockScorer mockScorer = new MockScorer(scoreType); TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false); expectedTopDocsCollector.setScorer(mockScorer); if (childValueToParentIds.containsKey(childValue)) { AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME); if (terms != null) { NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget(); TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey())); if (seekStatus == TermsEnum.SeekStatus.FOUND) { docsEnum = termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); expectedResult.set(docsEnum.nextDoc()); mockScorer.scores = entry.getValue(); expectedTopDocsCollector.collect(docsEnum.docID()); } else if (seekStatus == TermsEnum.SeekStatus.END) { break; } } } } assertBitSet(actualResult, expectedResult, searcher); assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs()); } indexReader.close(); directory.close(); }