public DocumentFilteredLeafIndexReader( LeafReaderContext context, Filter preserveFilter, boolean negateFilter) throws IOException { super(context.reader()); final int maxDoc = in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); // ignore livedocs here, as we filter them later: final DocIdSet docs = preserveFilter.getDocIdSet(context, null); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { bits.or(it); } } if (negateFilter) { bits.flip(0, maxDoc); } if (in.hasDeletions()) { final Bits oldLiveDocs = in.getLiveDocs(); assert oldLiveDocs != null; final DocIdSetIterator it = new BitSetIterator(bits, 0L); // the cost is not useful here for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { // we can safely modify the current bit, as the iterator already stepped over it: bits.clear(i); } } } this.liveDocs = bits; this.numDocs = bits.cardinality(); }
@Override public Scorer filteredScorer(LeafReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException { // CHANGE: If threshold is 0, always pass down the accept docs, don't pay the price of calling // nextDoc even... final Bits filterAcceptDocs = docIdSet.bits(); if (threshold == 0) { if (filterAcceptDocs != null) { return weight.scorer(context, filterAcceptDocs); } else { return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer( context, weight, docIdSet); } } // CHANGE: handle "default" value if (threshold == -1) { // default value, don't iterate on only apply filter after query if its not a "fast" // docIdSet // TODO: is there a way we could avoid creating an iterator here? if (filterAcceptDocs != null && DocIdSets.isBroken(docIdSet.iterator())) { return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer( context, weight, docIdSet); } } return super.filteredScorer(context, weight, docIdSet); }
@Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } DocIdSet docIdSet = get(in.getQuery(), context); if (docIdSet == null) { if (shouldCache(context)) { docIdSet = cache(context); putIfAbsent(in.getQuery(), context, docIdSet); } else { return in.bulkScorer(context); } } assert docIdSet != null; if (docIdSet == DocIdSet.EMPTY) { return null; } final DocIdSetIterator disi = docIdSet.iterator(); if (disi == null) { return null; } return new DefaultBulkScorer(new ConstantScoreScorer(this, 0f, disi)); }
// Delete by query private static long applyQueryDeletes( Iterable<QueryAndLimit> queriesIter, ReadersAndUpdates rld, final SegmentReader reader) throws IOException { long delCount = 0; final AtomicReaderContext readerContext = reader.getContext(); boolean any = false; for (QueryAndLimit ent : queriesIter) { Query query = ent.query; int limit = ent.limit; final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext, reader.getLiveDocs()); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { while (true) { int doc = it.nextDoc(); if (doc >= limit) { break; } if (!any) { rld.initWritableLiveDocs(); any = true; } if (rld.delete(doc)) { delCount++; } } } } } return delCount; }
@Override public DocIdSetIterator iterator() throws IOException { // we try and be smart here, if we can iterate through docsets quickly, prefer to iterate // over them as much as possible, before actually going to "bits" based ones to check List<DocIdSet> iterators = new ArrayList<DocIdSet>(sets.length); List<Bits> bits = new ArrayList<Bits>(sets.length); for (DocIdSet set : sets) { if (DocIdSets.isFastIterator(set)) { iterators.add(set); } else { Bits bit = set.bits(); if (bit != null) { bits.add(bit); } else { iterators.add(set); } } } if (bits.isEmpty()) { return new IteratorBasedIterator(iterators.toArray(new DocIdSet[iterators.size()])); } if (iterators.isEmpty()) { return new BitsDocIdSetIterator(new AndBits(bits.toArray(new Bits[bits.size()]))); } // combination of both..., first iterating over the "fast" ones, and then checking on the more // expensive ones return new BitsDocIdSetIterator.FilteredIterator( new IteratorBasedIterator(iterators.toArray(new DocIdSet[iterators.size()])), new AndBits(bits.toArray(new Bits[bits.size()]))); }
/** * Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified * nested docId. */ public ObjectMapper findNestedObjectMapper( int nestedDocId, SearchContext sc, LeafReaderContext context) throws IOException { ObjectMapper nestedObjectMapper = null; for (ObjectMapper objectMapper : objectMappers().values()) { if (!objectMapper.nested().isNested()) { continue; } Filter filter = objectMapper.nestedTypeFilter(); if (filter == null) { continue; } // We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and // therefor is guaranteed to be a live doc. DocIdSet nestedTypeSet = filter.getDocIdSet(context, null); if (nestedTypeSet == null) { continue; } DocIdSetIterator iterator = nestedTypeSet.iterator(); if (iterator == null) { continue; } if (iterator.advance(nestedDocId) == nestedDocId) { if (nestedObjectMapper == null) { nestedObjectMapper = objectMapper; } else { if (nestedObjectMapper.fullPath().length() < objectMapper.fullPath().length()) { nestedObjectMapper = objectMapper; } } } } return nestedObjectMapper; }
@Override public Scorer scorer( AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, final Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; final DocIdSet dis = filter.getDocIdSet(context, acceptDocs); if (dis == null) { return null; } disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = innerWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); } if (disi == null) { return null; } return new ConstantScorer(disi, this, queryWeight); }
void remove(Query query) { assert query instanceof BoostQuery == false; assert query instanceof ConstantScoreQuery == false; DocIdSet removed = cache.remove(query); if (removed != null) { onDocIdSetEviction(HASHTABLE_RAM_BYTES_PER_ENTRY + removed.ramBytesUsed()); } }
@Test public void testStream_docIdSetIterator_empty() throws Exception { final Builder disBuilder = new Builder(10); final DocIdSet dis = disBuilder.build(); Assert.assertEquals( "Too much document ids streamed.", 0L, StreamUtils.stream(dis.iterator()).count()); }
@Override public boolean isCacheable() { for (DocIdSet set : sets) { if (!set.isCacheable()) { return false; } } return true; }
private RoaringDocIdSet(DocIdSet[] docIdSets, int cardinality) { this.docIdSets = docIdSets; long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(docIdSets); for (DocIdSet set : this.docIdSets) { if (set != null) { ramBytesUsed += set.ramBytesUsed(); } } this.ramBytesUsed = ramBytesUsed; this.cardinality = cardinality; }
private void tstFilterCard(String mes, int expected, Filter filt) throws Exception { final DocIdSet docIdSet = filt.getDocIdSet(reader.getContext(), reader.getLiveDocs()); int actual = 0; if (docIdSet != null) { DocIdSetIterator disi = docIdSet.iterator(); while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++; } } assertEquals(mes, expected, actual); }
@Override public Scorer scorer( AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs); if (childrenDocSet == null || childrenDocSet == DocIdSet.EMPTY_DOCIDSET) { return null; } IdReaderTypeCache idTypeCache = searchContext.idCache().reader(context.reader()).type(parentType); return new ChildScorer(this, uidToScore, childrenDocSet.iterator(), idTypeCache); }
private static void assertDocIdSetCacheable( IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException { assertTrue(reader.getContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); final CachingWrapperFilter cacher = new CachingWrapperFilter(filter); final DocIdSet originalSet = filter.getDocIdSet(context, context.reader().getLiveDocs()); final DocIdSet cachedSet = cacher.getDocIdSet(context, context.reader().getLiveDocs()); if (originalSet == null) { assertNull(cachedSet); } if (cachedSet == null) { assertTrue(originalSet == null || originalSet.iterator() == null); } else { assertTrue(cachedSet.isCacheable()); assertEquals(shouldCacheable, originalSet.isCacheable()); // System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: // "+cachedSet.getClass().getName()); if (originalSet.isCacheable()) { assertEquals( "Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass()); } else { assertTrue( "Cached DocIdSet must be an FixedBitSet if the original one was not cacheable", cachedSet instanceof FixedBitSet || cachedSet == null); } } }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter) filter).getDocIdSet(this.context, context, null) : filter.getDocIdSet(context, null); if (docIdSet == null) { return null; } DocIdSetIterator iterator = docIdSet.iterator(); if (iterator == null) { return null; } return new ConstantScoreScorer(this, score(), iterator); }
@Override public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; final DocIdSet dis = filter.getDocIdSet(reader); if (dis == null) return null; disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = innerWeight.scorer(reader, scoreDocsInOrder, topScorer); } if (disi == null) return null; return new ConstantScorer(similarity, disi, this); }
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException { if (d1 == null) { if (d2 != null) { assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc()); } } else if (d2 == null) { assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc()); } else { DocIdSetIterator i1 = d1.iterator(); DocIdSetIterator i2 = d2.iterator(); for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) { assertEquals(doc, i2.nextDoc()); } assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc()); } }
public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException { super(similarity); this.reader = reader; theScore = w.getValue(); DocIdSet docIdSet = filter.getDocIdSet(reader); if (docIdSet == null) { _innerIter = DocIdSet.EMPTY_DOCIDSET.iterator(); } else { DocIdSetIterator iter = docIdSet.iterator(); if (iter == null) { _innerIter = DocIdSet.EMPTY_DOCIDSET.iterator(); } else { _innerIter = iter; } } }
// pkg-private for testing synchronized void assertConsistent() { if (requiresEviction()) { throw new AssertionError( "requires evictions: size=" + mostRecentlyUsedQueries.size() + ", maxSize=" + maxSize + ", ramBytesUsed=" + ramBytesUsed() + ", maxRamBytesUsed=" + maxRamBytesUsed); } for (LeafCache leafCache : cache.values()) { Set<Query> keys = Collections.newSetFromMap(new IdentityHashMap<>()); keys.addAll(leafCache.cache.keySet()); keys.removeAll(mostRecentlyUsedQueries); if (!keys.isEmpty()) { throw new AssertionError( "One leaf cache contains more keys than the top-level cache: " + keys); } } long recomputedRamBytesUsed = HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size() + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size(); for (Query query : mostRecentlyUsedQueries) { recomputedRamBytesUsed += ramBytesUsed(query); } for (LeafCache leafCache : cache.values()) { recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size(); for (DocIdSet set : leafCache.cache.values()) { recomputedRamBytesUsed += set.ramBytesUsed(); } } if (recomputedRamBytesUsed != ramBytesUsed) { throw new AssertionError( "ramBytesUsed mismatch : " + ramBytesUsed + " != " + recomputedRamBytesUsed); } long recomputedCacheSize = 0; for (LeafCache leafCache : cache.values()) { recomputedCacheSize += leafCache.cache.size(); } if (recomputedCacheSize != getCacheSize()) { throw new AssertionError( "cacheSize mismatch : " + getCacheSize() + " != " + recomputedCacheSize); } }
public SpansFiltered(Spans spans, DocIdSet filterDocs) throws IOException { this.spans = BLSpansWrapper.optWrap(spans); docIdSetIter = filterDocs.iterator(); more = false; if (docIdSetIter != null) { more = (docIdSetIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); } }
void putIfAbsent(Query query, DocIdSet set) { assert query instanceof BoostQuery == false; assert query instanceof ConstantScoreQuery == false; if (cache.putIfAbsent(query, set) == null) { // the set was actually put onDocIdSetCache(HASHTABLE_RAM_BYTES_PER_ENTRY + set.ramBytesUsed()); } }
@Override public void compute(OpenBitSetDISI accumulator, DocIdSet bitset) throws IOException { if (bitset instanceof OpenBitSet) { accumulator.and((OpenBitSet) bitset); } else if (bitset instanceof SortedVIntList) { accumulator.inPlaceAnd(bitset.iterator()); } else { throw new IllegalArgumentException("Not supported:" + bitset); } }
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException { DocIdSetIterator ia = a.iterator(); DocIdSetIterator ib = b.iterator(); // test for next() equivalence for (; ; ) { int da = ia.nextDoc(); int db = ib.nextDoc(); assertEquals(da, db); assertEquals(ia.docID(), ib.docID()); if (da == DocIdSetIterator.NO_MORE_DOCS) break; } for (int i = 0; i < 10; i++) { // test random skipTo() and next() ia = a.iterator(); ib = b.iterator(); int doc = -1; for (; ; ) { int da, db; if (rand.nextBoolean()) { da = ia.nextDoc(); db = ib.nextDoc(); } else { int target = doc + rand.nextInt(10) + 1; // keep in mind future edge cases like probing (increase if necessary) da = ia.advance(target); db = ib.advance(target); } assertEquals(da, db); assertEquals(ia.docID(), ib.docID()); if (da == DocIdSetIterator.NO_MORE_DOCS) break; doc = da; } } }
private List<Document> lookupDocs(Term term, final LoadFieldCallback lfc) throws IOException { final List<Document> documents = new ArrayList<Document>(); final TermFilter tf = new TermFilter(term); try { for (AtomicReaderContext arc : searcher.getIndexReader().leaves()) { AtomicReader ar = arc.reader(); Bits liveDocs = ar.getLiveDocs(); DocIdSet docSet = tf.getDocIdSet(arc, liveDocs); if (docSet != null) { DocIdSetIterator disi = docSet.iterator(); if (disi != null) { int docId; while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { DocumentStoredFieldVisitor fv = new DocumentStoredFieldVisitor() { @Override public StoredFieldVisitor.Status needsField(FieldInfo fieldInfo) throws IOException { if (lfc == null || lfc.loadField(fieldInfo.name)) { return StoredFieldVisitor.Status.YES; } return StoredFieldVisitor.Status.NO; } }; ar.document(docId, fv); documents.add(fv.getDocument()); } } } } } catch (IOException io) { throw new IndexException(io); } return documents; }
@Test public void testStream_docIdSetIterator() throws Exception { final Builder disBuilder = new Builder(10); disBuilder.add(1).add(3).add(6).add(7).add(8).add(10); final DocIdSet dis = disBuilder.build(); Assert.assertEquals( "Not all document ids streamed.", 6L, StreamUtils.stream(dis.iterator()).count()); Assert.assertEquals( "Document id count mismatch.", 1L, StreamUtils.stream(dis.iterator()).filter(id -> id == 1).count()); Assert.assertEquals( "Document id count mismatch.", 1L, StreamUtils.stream(dis.iterator()).filter(id -> id == 3).count()); Assert.assertEquals( "Document id count mismatch.", 1L, StreamUtils.stream(dis.iterator()).filter(id -> id == 6).count()); Assert.assertEquals( "Document id count mismatch.", 1L, StreamUtils.stream(dis.iterator()).filter(id -> id == 7).count()); Assert.assertEquals( "Document id count mismatch.", 1L, StreamUtils.stream(dis.iterator()).filter(id -> id == 8).count()); Assert.assertEquals( "Document id count mismatch.", 1L, StreamUtils.stream(dis.iterator()).filter(id -> id == 10).count()); Assert.assertEquals( "Unknown document id found.", 0L, StreamUtils.stream(dis.iterator()) .filter(id -> id != 1 && id != 3 && id != 6 && id != 7 && id != 8 && id != 10) .count()); }
@Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { if (remaining == 0) { return null; } if (shortCircuitFilter != null) { DocIdSet docIdSet = shortCircuitFilter.getDocIdSet(context, acceptDocs); if (!DocIdSets.isEmpty(docIdSet)) { DocIdSetIterator iterator = docIdSet.iterator(); if (iterator != null) { return ConstantScorer.create(iterator, this, queryWeight); } } return null; } DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, acceptDocs); if (!DocIdSets.isEmpty(parentDocIdSet)) { // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The // "remaining" // count down (short circuit) logic will then work as expected. parentDocIdSet = BitsFilteredDocIdSet.wrap(parentDocIdSet, context.reader().getLiveDocs()); DocIdSetIterator innerIterator = parentDocIdSet.iterator(); if (innerIterator != null) { LongBitSet parentOrds = collector.parentOrds; SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType); if (globalValues != null) { DocIdSetIterator parentIdIterator = new ParentOrdIterator(innerIterator, parentOrds, globalValues, this); return ConstantScorer.create(parentIdIterator, this, queryWeight); } } } return null; }
IteratorBasedIterator(DocIdSet[] sets) throws IOException { iterators = new DocIdSetIterator[sets.length]; int j = 0; long cost = Integer.MAX_VALUE; for (DocIdSet set : sets) { if (set == null) { lastReturn = DocIdSetIterator.NO_MORE_DOCS; // non matching break; } else { DocIdSetIterator dcit = set.iterator(); if (dcit == null) { lastReturn = DocIdSetIterator.NO_MORE_DOCS; // non matching break; } iterators[j++] = dcit; cost = Math.min(cost, dcit.cost()); } } this.cost = cost; if (lastReturn != DocIdSetIterator.NO_MORE_DOCS) { lastReturn = (iterators.length > 0 ? -1 : DocIdSetIterator.NO_MORE_DOCS); } }
@Override public DocIdSet getDocIdSet(IndexReader subReader) throws IOException { if (bases == null || !bases.containsKey(subReader)) { return docs; } int docBase = bases.get(subReader); int readerSize = subReader.maxDoc(); OpenBitSet filter = new OpenBitSet(readerSize); DocIdSetIterator iterator = docs.iterator(); int doc = iterator.advance(docBase); while (doc < docBase + readerSize) { filter.set(doc - docBase); doc = iterator.nextDoc(); } return filter; }
private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity( SearchContext context, int nestedSubDocId, LeafReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException { int currentParent = nestedSubDocId; ObjectMapper nestedParentObjectMapper; StringBuilder field = new StringBuilder(); ObjectMapper current = nestedObjectMapper; InternalSearchHit.InternalNestedIdentity nestedIdentity = null; do { Filter parentFilter; nestedParentObjectMapper = documentMapper.findParentObjectMapper(current); if (field.length() != 0) { field.insert(0, '.'); } field.insert(0, current.name()); if (nestedParentObjectMapper != null) { if (nestedParentObjectMapper.nested().isNested() == false) { current = nestedParentObjectMapper; continue; } parentFilter = nestedParentObjectMapper.nestedTypeFilter(); } else { parentFilter = Queries.newNonNestedFilter(); } Filter childFilter = nestedObjectMapper.nestedTypeFilter(); if (childFilter == null) { current = nestedParentObjectMapper; continue; } // We can pass down 'null' as acceptedDocs, because we're fetching matched docId that matched // in the query phase. DocIdSet childDocSet = childFilter.getDocIdSet(subReaderContext, null); if (childDocSet == null) { current = nestedParentObjectMapper; continue; } DocIdSetIterator childIter = childDocSet.iterator(); if (childIter == null) { current = nestedParentObjectMapper; continue; } BitDocIdSet parentBitSet = context .bitsetFilterCache() .getBitDocIdSetFilter(parentFilter) .getDocIdSet(subReaderContext); BitSet parentBits = parentBitSet.bits(); int offset = 0; int nextParent = parentBits.nextSetBit(currentParent); for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) { offset++; } currentParent = nextParent; current = nestedObjectMapper = nestedParentObjectMapper; nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field.toString(), offset, nestedIdentity); field = new StringBuilder(); } while (current != null); return nestedIdentity; }
@Override public OpenBitSetDISI newAccumulator(int bitsetSize, DocIdSet b) throws IOException { return new OpenBitSetDISI(b.iterator(), bitsetSize); }