public static Map<String, Integer> termFrequencies( IndexSearcher indexSearcher, Query documentFilterQuery, String fieldName, String propName, String altName) { try { String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName); Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false); Map<String, Integer> freq = new HashMap<>(); IndexReader indexReader = indexSearcher.getIndexReader(); for (LeafReaderContext arc : indexReader.leaves()) { if (weight == null) throw new RuntimeException("weight == null"); if (arc == null) throw new RuntimeException("arc == null"); if (arc.reader() == null) throw new RuntimeException("arc.reader() == null"); Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs()); if (scorer != null) { while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { getFrequenciesFromTermVector( indexReader, scorer.docID() + arc.docBase, luceneField, freq); } } } return freq; } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
/** * Used when base query is highly constraining vs the drilldowns, or when the docs must be scored * at once (i.e., like BooleanScorer2, not BooleanScorer). In this case we just .next() on base * and .advance() on the dim filters. */ private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException { // if (DEBUG) { // System.out.println(" doQueryFirstScoring"); // } int docID = baseScorer.docID(); nextDoc: while (docID != PostingsEnum.NO_MORE_DOCS) { if (acceptDocs != null && acceptDocs.get(docID) == false) { docID = baseScorer.nextDoc(); continue; } LeafCollector failedCollector = null; for (DocsAndCost dim : dims) { // TODO: should we sort this 2nd dimension of // docsEnums from most frequent to least? if (dim.approximation.docID() < docID) { dim.approximation.advance(docID); } boolean matches = false; if (dim.approximation.docID() == docID) { if (dim.twoPhase == null) { matches = true; } else { matches = dim.twoPhase.matches(); } } if (matches == false) { if (failedCollector != null) { // More than one dim fails on this document, so // it's neither a hit nor a near-miss; move to // next doc: docID = baseScorer.nextDoc(); continue nextDoc; } else { failedCollector = dim.sidewaysLeafCollector; } } } collectDocID = docID; // TODO: we could score on demand instead since we are // daat here: collectScore = baseScorer.score(); if (failedCollector == null) { // Hit passed all filters, so it's "real": collectHit(collector, dims); } else { // Hit missed exactly one filter: collectNearMiss(failedCollector); } docID = baseScorer.nextDoc(); } }
private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity( SearchContext context, int nestedSubDocId, LeafReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException { int currentParent = nestedSubDocId; ObjectMapper nestedParentObjectMapper; ObjectMapper current = nestedObjectMapper; String originalName = nestedObjectMapper.name(); InternalSearchHit.InternalNestedIdentity nestedIdentity = null; do { Query parentFilter; nestedParentObjectMapper = documentMapper.findParentObjectMapper(current); if (nestedParentObjectMapper != null) { if (nestedParentObjectMapper.nested().isNested() == false) { current = nestedParentObjectMapper; continue; } parentFilter = nestedParentObjectMapper.nestedTypeFilter(); } else { parentFilter = Queries.newNonNestedFilter(); } Query childFilter = nestedObjectMapper.nestedTypeFilter(); if (childFilter == null) { current = nestedParentObjectMapper; continue; } final Weight childWeight = context.searcher().createNormalizedWeight(childFilter, false); Scorer childScorer = childWeight.scorer(subReaderContext); if (childScorer == null) { current = nestedParentObjectMapper; continue; } DocIdSetIterator childIter = childScorer.iterator(); BitSet parentBits = context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext); int offset = 0; int nextParent = parentBits.nextSetBit(currentParent); for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) { offset++; } currentParent = nextParent; current = nestedObjectMapper = nestedParentObjectMapper; int currentPrefix = current == null ? 0 : current.name().length() + 1; nestedIdentity = new InternalSearchHit.InternalNestedIdentity( originalName.substring(currentPrefix), offset, nestedIdentity); if (current != null) { originalName = current.name(); } } while (current != null); return nestedIdentity; }
@Override public void setScorer(Scorer scorer) { // System.out.println("C.setScorer scorer=" + scorer); // Since we invoke .score(), and the comparators likely // do as well, cache it so it's only "really" computed // once: this.scorer = new ScoreCachingWrappingScorer(scorer); for (int compIDX = 0; compIDX < comparators.length; compIDX++) { comparators[compIDX].setScorer(this.scorer); } Arrays.fill(joinScorers, null); Queue<Scorer> queue = new LinkedList<>(); // System.out.println("\nqueue: add top scorer=" + scorer); queue.add(scorer); while ((scorer = queue.poll()) != null) { // System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery()); if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) { enroll( (ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer); } for (ChildScorer sub : scorer.getChildren()) { // System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery()); queue.add(sub.child); } } }
@Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription( "weight(" + getQuery() + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], result of:"); Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); result.addDetail(scoreExplanation); result.setValue(scoreExplanation.getValue()); result.setMatch(true); return result; } } return new ComplexExplanation(false, 0.0f, "no matching term"); }
// NOTE: acceptDocs applies (and is checked) only in the // parent document space @Override public Scorer scorer(LeafReaderContext readerContext) throws IOException { final Scorer childScorer = childWeight.scorer(readerContext); if (childScorer == null) { // No matches return null; } final int firstChildDoc = childScorer.iterator().nextDoc(); if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) { // No matches return null; } // NOTE: this does not take accept docs into account, the responsibility // to not match deleted docs is on the scorer final BitSet parents = parentsFilter.getBitSet(readerContext); if (parents == null) { // No matches return null; } return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode); }
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */ @Override public float score() throws IOException { for (int i = 0; i < valSrcScorers.length; i++) { vScores[i] = valSrcScorers[i].score(); } return qWeight * provider.customScore(subQueryScorer.docID(), subQueryScorer.score(), vScores); }
/** Replay the wrapped collector, but only on a selection of buckets. */ @Override public void prepareSelectedBuckets(long... selectedBuckets) throws IOException { if (!finished) { throw new IllegalStateException( "Cannot replay yet, collection is not finished: postCollect() has not been called"); } if (this.selectedBuckets != null) { throw new IllegalStateException("Already been replayed"); } final LongHash hash = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE); for (long bucket : selectedBuckets) { hash.add(bucket); } this.selectedBuckets = hash; collector.preCollection(); boolean needsScores = collector.needsScores(); Weight weight = null; if (needsScores) { weight = aggContext .searchContext() .searcher() .createNormalizedWeight(aggContext.searchContext().query(), true); } for (Entry entry : entries) { final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context); DocIdSetIterator docIt = null; if (needsScores && entry.docDeltas.size() > 0) { Scorer scorer = weight.scorer(entry.context); // We don't need to check if the scorer is null // since we are sure that there are documents to replay (entry.docDeltas it not empty). docIt = scorer.iterator(); leafCollector.setScorer(scorer); } final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator(); final PackedLongValues.Iterator buckets = entry.buckets.iterator(); int doc = 0; for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) { doc += docDeltaIterator.next(); final long bucket = buckets.next(); final long rebasedBucket = hash.find(bucket); if (rebasedBucket != -1) { if (needsScores) { if (docIt.docID() < doc) { docIt.advance(doc); } // aggregations should only be replayed on matching documents assert docIt.docID() == doc; } leafCollector.collect(doc, rebasedBucket); } } } collector.postCollection(); }
@Override public float score() throws IOException { float score = qWeight * scorer.score() * vals.floatVal(scorer.docID()); // Current Lucene priority queues can't handle NaN and -Infinity, so // map to -Float.MAX_VALUE. This conditional handles both -infinity // and NaN since comparisons with NaN are always false. return score > Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE; }
protected AugmentScorer( BoboIndexReader reader, Scorer innerScorer, ScoreAugmentFunction func, JSONObject jsonParms) throws IOException { super(innerScorer.getSimilarity()); _innerScorer = innerScorer; _func = func; _func.initializeReader(reader, jsonParms); }
@Override public void collect(int doc) throws IOException { if (typeCache == null) { return; } HashedBytesArray parentUid = typeCache.idByDoc(doc); uidToScore.put(parentUid, scorer.score()); }
@Override public int advance(int target) throws IOException { int doc = subQueryScorer.advance(target); if (doc != NO_MORE_DOCS) { for (int i = 0; i < valSrcScorers.length; i++) { valSrcScorers[i].advance(doc); } } return doc; }
/** * Get a list of all Spans made available from the passed-in Scorer * * @param scorer the scorer to extract spans from * @param errorOnNoSpans if true, throw an error if no Spans can be extracted from the Scorer or * any of its children * @return a List of Spans */ public static List<Spans> getSpans(Scorer scorer, boolean errorOnNoSpans) { List<Spans> spans = new ArrayList<>(); if (scorer instanceof Spans) { spans.add((Spans) scorer); return spans; } Collection<Scorer.ChildScorer> children = scorer.getChildren(); if (errorOnNoSpans && children.size() == 0) throw new RuntimeException( "Couldn't extract SpanScorer from " + scorer.getClass().getCanonicalName()); for (Scorer.ChildScorer child : children) { spans.addAll(getSpans(child.child, errorOnNoSpans)); } return spans; }
DocsAndCost(Scorer scorer, Collector sidewaysCollector) { final TwoPhaseIterator twoPhase = scorer.asTwoPhaseIterator(); if (twoPhase == null) { this.approximation = scorer; this.twoPhase = null; } else { this.approximation = twoPhase.approximation(); this.twoPhase = twoPhase; } this.sidewaysCollector = sidewaysCollector; }
@Override public void collect(int doc) throws IOException { if (counter >= from) { docs.add(new ScoreDoc(docBase + doc, trackScores ? scorer.score() : 0f)); } readerState.count++; counter++; if (counter >= to) { throw StopCollectingException; } }
@Override protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { for (LeafReaderContext ctx : leaves) { // search each subreader // we force the use of Scorer (not BulkScorer) to make sure // that the scorer passed to LeafCollector.setScorer supports // Scorer.getChildren Scorer scorer = weight.scorer(ctx); if (scorer != null) { final LeafCollector leafCollector = collector.getLeafCollector(ctx); leafCollector.setScorer(scorer); final Bits liveDocs = ctx.reader().getLiveDocs(); for (int doc = scorer.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = scorer.nextDoc()) { if (liveDocs == null || liveDocs.get(doc)) { leafCollector.collect(doc); } } } } }
@Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { float score = scorer.score(); ComplexExplanation result = new ComplexExplanation(); result.setDescription("ImageHashLimitQuery, product of:"); result.setValue(score); if (getBoost() != 1.0f) { result.addDetail(new Explanation(getBoost(), "boost")); score = score / getBoost(); } result.addDetail(new Explanation(score, "image score (1/distance)")); result.setMatch(true); return result; } } return new ComplexExplanation(false, 0.0f, "no matching term"); }
/** * Collect all Spans extracted from a Scorer using a SpanCollector * * @param scorer the scorer to extract Spans from * @param collector the SpanCollector * @param errorOnNoSpans if true, throw an error if no Spans can be extracted from the Scorer or * any of its children * @throws IOException on error */ public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans) throws IOException { List<Spans> allSpans = getSpans(scorer, errorOnNoSpans); int doc = scorer.docID(); for (Spans spans : allSpans) { int spanDoc = spans.docID(); // if the Scorer advances lazily, then not all of its subspans may be on // the correct document if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { spans.collect(collector); } } } }
@Override public void collect(int doc) throws IOException { if (values != null) { long ord = ordinals.getOrd(doc); long parentIdx = parentIdsIndex.get(ord); if (parentIdx < 0) { final BytesRef bytes = values.getValueByOrd(ord); final int hash = values.currentValueHash(); parentIdx = parentIds.add(bytes, hash); if (parentIdx < 0) { parentIdx = -parentIdx - 1; doScore(parentIdx); } else { scores = bigArrays.grow(scores, parentIdx + 1); scores.set(parentIdx, scorer.score()); } parentIdsIndex.set(ord, parentIdx); } else { doScore(parentIdx); } } }
@Override protected void collect(int doc, HashedBytesArray parentUid) throws IOException { float currentScore = scorer.score(); switch (scoreType) { case SUM: uidToScore.addTo(parentUid, currentScore); break; case MAX: if (uidToScore.containsKey(parentUid)) { float previousScore = uidToScore.lget(); if (currentScore > previousScore) { uidToScore.lset(currentScore); } } else { uidToScore.put(parentUid, currentScore); } break; case AVG: assert false : "AVG has its own collector"; default: assert false : "Are we missing a score type here? -- " + scoreType; break; } }
@Override public int score(LeafCollector collector, Bits acceptDocs, int min, int maxDoc) throws IOException { if (min != 0) { throw new IllegalArgumentException("min must be 0, got " + min); } if (maxDoc != Integer.MAX_VALUE) { throw new IllegalArgumentException("maxDoc must be Integer.MAX_VALUE"); } // if (DEBUG) { // System.out.println("\nscore: reader=" + context.reader()); // } // System.out.println("score r=" + context.reader()); FakeScorer scorer = new FakeScorer(); collector.setScorer(scorer); if (drillDownCollector != null) { drillDownLeafCollector = drillDownCollector.getLeafCollector(context); drillDownLeafCollector.setScorer(scorer); } else { drillDownLeafCollector = null; } for (DocsAndCost dim : dims) { dim.sidewaysLeafCollector = dim.sidewaysCollector.getLeafCollector(context); dim.sidewaysLeafCollector.setScorer(scorer); } // TODO: if we ever allow null baseScorer ... it will // mean we DO score docs out of order ... hmm, or if we // change up the order of the conjuntions below assert baseScorer != null; // some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc long baseQueryCost = baseScorer.cost(); final int numDims = dims.length; long drillDownCost = 0; for (int dim = 0; dim < numDims; dim++) { drillDownCost += dims[dim].approximation.cost(); } long drillDownAdvancedCost = 0; if (numDims > 1) { drillDownAdvancedCost = dims[1].approximation.cost(); } // Position all scorers to their first matching doc: baseScorer.nextDoc(); for (DocsAndCost dim : dims) { dim.approximation.nextDoc(); } /* System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount); System.out.println(" maxDoc=" + context.reader().maxDoc()); System.out.println(" maxCost=" + maxCost); System.out.println(" dims[0].freq=" + dims[0].freq); if (numDims > 1) { System.out.println(" dims[1].freq=" + dims[1].freq); } */ if (scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) { // System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length // + " bits.length=" + bits.length); doQueryFirstScoring(acceptDocs, collector, dims); } else if (numDims > 1 && drillDownAdvancedCost < baseQueryCost / 10) { // System.out.println("drillDownAdvance"); doDrillDownAdvanceScoring(acceptDocs, collector, dims); } else { // System.out.println("union"); doUnionScoring(acceptDocs, collector, dims); } return Integer.MAX_VALUE; }
@Override public void collect(int parentDoc) throws IOException { // System.out.println("\nC parentDoc=" + parentDoc); totalHitCount++; float score = Float.NaN; if (trackMaxScore) { score = scorer.score(); maxScore = Math.max(maxScore, score); } // TODO: we could sweep all joinScorers here and // aggregate total child hit count, so we can fill this // in getTopGroups (we wire it to 0 now) if (queueFull) { // System.out.println(" queueFull"); // Fastmatch: return if this hit is not competitive for (int i = 0; ; i++) { final int c = reverseMul[i] * comparators[i].compareBottom(parentDoc); if (c < 0) { // Definitely not competitive. // System.out.println(" skip"); return; } else if (c > 0) { // Definitely competitive. break; } else if (i == compEnd) { // Here c=0. If we're at the last comparator, this doc is not // competitive, since docs are visited in doc Id order, which means // this doc cannot compete with any other document in the queue. // System.out.println(" skip"); return; } } // System.out.println(" competes! doc=" + (docBase + parentDoc)); // This hit is competitive - replace bottom element in queue & adjustTop for (int i = 0; i < comparators.length; i++) { comparators[i].copy(bottom.slot, parentDoc); } if (!trackMaxScore && trackScores) { score = scorer.score(); } bottom.doc = docBase + parentDoc; bottom.readerContext = currentReaderContext; bottom.score = score; copyGroups(bottom); bottom = queue.updateTop(); for (int i = 0; i < comparators.length; i++) { comparators[i].setBottom(bottom.slot); } } else { // Startup transient: queue is not yet full: final int comparatorSlot = totalHitCount - 1; // Copy hit into queue for (int i = 0; i < comparators.length; i++) { comparators[i].copy(comparatorSlot, parentDoc); } // System.out.println(" startup: new OG doc=" + (docBase+parentDoc)); if (!trackMaxScore && trackScores) { score = scorer.score(); } final OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.length, trackScores); og.readerContext = currentReaderContext; copyGroups(og); bottom = queue.add(og); queueFull = totalHitCount == numParentHits; if (queueFull) { // End of startup transient: queue just filled up: for (int i = 0; i < comparators.length; i++) { comparators[i].setBottom(bottom.slot); } } } }
/** not a direct test of NearSpans, but a demonstration of how/when this causes problems */ public void testSpanNearScorerSkipTo1() throws Exception { SpanNearQuery q = makeQuery(); Weight w = q.weight(searcher); Scorer s = w.scorer(searcher.getIndexReader(), true, false); assertEquals(1, s.advance(1)); }
@Override public void collect(int doc) throws IOException { if (scorer.score() > minScore) { count++; } }
@Override public int docID() { return subQueryScorer.docID(); }
@Override public int nextDoc() throws IOException { return scorer.nextDoc(); }
@Override public int advance(int target) throws IOException { return scorer.advance(target); }
@Override public int docID() { return scorer.docID(); }
@Override public float score() throws IOException { return (_func.useInnerScore()) ? _func.newScore(_innerScorer.score(), _innerScorer.docID()) : _func.newScore(_innerScorer.docID()); }
@Override public int docID() { return _innerScorer.docID(); }