/** Replay the wrapped collector, but only on a selection of buckets. */ @Override public void prepareSelectedBuckets(long... selectedBuckets) throws IOException { if (!finished) { throw new IllegalStateException( "Cannot replay yet, collection is not finished: postCollect() has not been called"); } if (this.selectedBuckets != null) { throw new IllegalStateException("Already been replayed"); } final LongHash hash = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE); for (long bucket : selectedBuckets) { hash.add(bucket); } this.selectedBuckets = hash; collector.preCollection(); boolean needsScores = collector.needsScores(); Weight weight = null; if (needsScores) { weight = aggContext .searchContext() .searcher() .createNormalizedWeight(aggContext.searchContext().query(), true); } for (Entry entry : entries) { final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context); DocIdSetIterator docIt = null; if (needsScores && entry.docDeltas.size() > 0) { Scorer scorer = weight.scorer(entry.context); // We don't need to check if the scorer is null // since we are sure that there are documents to replay (entry.docDeltas it not empty). docIt = scorer.iterator(); leafCollector.setScorer(scorer); } final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator(); final PackedLongValues.Iterator buckets = entry.buckets.iterator(); int doc = 0; for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) { doc += docDeltaIterator.next(); final long bucket = buckets.next(); final long rebasedBucket = hash.find(bucket); if (rebasedBucket != -1) { if (needsScores) { if (docIt.docID() < doc) { docIt.advance(doc); } // aggregations should only be replayed on matching documents assert docIt.docID() == doc; } leafCollector.collect(doc, rebasedBucket); } } } collector.postCollection(); }
@Override public boolean needsScores() { if (collector == null) { throw new IllegalStateException(); } return collector.needsScores(); }
public void testResetRootDocId() throws Exception { Directory directory = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(NoMergePolicy.INSTANCE); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc); List<Document> documents = new ArrayList<>(); // 1 segment with, 1 root document, with 3 nested sub docs Document document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); documents.clear(); // 1 segment with: // 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); documents.clear(); // and 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); indexWriter.close(); IndexService indexService = createIndex("test"); DirectoryReader directoryReader = DirectoryReader.open(directory); directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0)); IndexSearcher searcher = new IndexSearcher(directoryReader); indexService .mapperService() .merge( "test", new CompressedXContent( PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested") .string()), MapperService.MergeReason.MAPPING_UPDATE, false); SearchContext searchContext = createSearchContext(indexService); AggregationContext context = new AggregationContext(searchContext); AggregatorFactories.Builder builder = AggregatorFactories.builder(); NestedAggregatorBuilder factory = new NestedAggregatorBuilder("test", "nested_field"); builder.addAggregator(factory); AggregatorFactories factories = builder.build(context, null); searchContext.aggregations(new SearchContextAggregations(factories)); Aggregator[] aggs = factories.createTopLevelAggregators(); BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs)); collector.preCollection(); // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here // (otherwise MatchAllDocsQuery would be sufficient) // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc // when we process a new segment, because // root doc type#3 and root doc type#1 have the same segment docid BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(Queries.newNonNestedFilter(), Occur.MUST); bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT); searcher.search(new ConstantScoreQuery(bq.build()), collector); collector.postCollection(); Nested nested = (Nested) aggs[0].buildAggregation(0); // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous // child docs from the first segment are emitted as hits. assertThat(nested.getDocCount(), equalTo(4L)); directoryReader.close(); directory.close(); }
/** Set the deferred collectors. */ @Override public void setDeferredCollector(Iterable<BucketCollector> deferredCollectors) { this.collector = BucketCollector.wrap(deferredCollectors); }