/** * Used when base query is highly constraining vs the drilldowns, or when the docs must be scored * at once (i.e., like BooleanScorer2, not BooleanScorer). In this case we just .next() on base * and .advance() on the dim filters. */ private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException { // if (DEBUG) { // System.out.println(" doQueryFirstScoring"); // } int docID = baseScorer.docID(); nextDoc: while (docID != PostingsEnum.NO_MORE_DOCS) { if (acceptDocs != null && acceptDocs.get(docID) == false) { docID = baseScorer.nextDoc(); continue; } LeafCollector failedCollector = null; for (DocsAndCost dim : dims) { // TODO: should we sort this 2nd dimension of // docsEnums from most frequent to least? if (dim.approximation.docID() < docID) { dim.approximation.advance(docID); } boolean matches = false; if (dim.approximation.docID() == docID) { if (dim.twoPhase == null) { matches = true; } else { matches = dim.twoPhase.matches(); } } if (matches == false) { if (failedCollector != null) { // More than one dim fails on this document, so // it's neither a hit nor a near-miss; move to // next doc: docID = baseScorer.nextDoc(); continue nextDoc; } else { failedCollector = dim.sidewaysLeafCollector; } } } collectDocID = docID; // TODO: we could score on demand instead since we are // daat here: collectScore = baseScorer.score(); if (failedCollector == null) { // Hit passed all filters, so it's "real": collectHit(collector, dims); } else { // Hit missed exactly one filter: collectNearMiss(failedCollector); } docID = baseScorer.nextDoc(); } }
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */ @Override public float score() throws IOException { for (int i = 0; i < valSrcScorers.length; i++) { vScores[i] = valSrcScorers[i].score(); } return qWeight * provider.customScore(subQueryScorer.docID(), subQueryScorer.score(), vScores); }
@Override public void collect(int doc) throws IOException { if (typeCache == null) { return; } HashedBytesArray parentUid = typeCache.idByDoc(doc); uidToScore.put(parentUid, scorer.score()); }
@Override public float score() throws IOException { float score = qWeight * scorer.score() * vals.floatVal(scorer.docID()); // Current Lucene priority queues can't handle NaN and -Infinity, so // map to -Float.MAX_VALUE. This conditional handles both -infinity // and NaN since comparisons with NaN are always false. return score > Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE; }
@Override public void collect(int doc) throws IOException { if (counter >= from) { docs.add(new ScoreDoc(docBase + doc, trackScores ? scorer.score() : 0f)); } readerState.count++; counter++; if (counter >= to) { throw StopCollectingException; } }
@Override public void collect(int doc) throws IOException { if (values != null) { long ord = ordinals.getOrd(doc); long parentIdx = parentIdsIndex.get(ord); if (parentIdx < 0) { final BytesRef bytes = values.getValueByOrd(ord); final int hash = values.currentValueHash(); parentIdx = parentIds.add(bytes, hash); if (parentIdx < 0) { parentIdx = -parentIdx - 1; doScore(parentIdx); } else { scores = bigArrays.grow(scores, parentIdx + 1); scores.set(parentIdx, scorer.score()); } parentIdsIndex.set(ord, parentIdx); } else { doScore(parentIdx); } } }
@Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { float score = scorer.score(); ComplexExplanation result = new ComplexExplanation(); result.setDescription("ImageHashLimitQuery, product of:"); result.setValue(score); if (getBoost() != 1.0f) { result.addDetail(new Explanation(getBoost(), "boost")); score = score / getBoost(); } result.addDetail(new Explanation(score, "image score (1/distance)")); result.setMatch(true); return result; } } return new ComplexExplanation(false, 0.0f, "no matching term"); }
@Override protected void collect(int doc, HashedBytesArray parentUid) throws IOException { float currentScore = scorer.score(); switch (scoreType) { case SUM: uidToScore.addTo(parentUid, currentScore); break; case MAX: if (uidToScore.containsKey(parentUid)) { float previousScore = uidToScore.lget(); if (currentScore > previousScore) { uidToScore.lset(currentScore); } } else { uidToScore.put(parentUid, currentScore); } break; case AVG: assert false : "AVG has its own collector"; default: assert false : "Are we missing a score type here? -- " + scoreType; break; } }
@Override public float score() throws IOException { return (_func.useInnerScore()) ? _func.newScore(_innerScorer.score(), _innerScorer.docID()) : _func.newScore(_innerScorer.docID()); }
@Override public void collect(int parentDoc) throws IOException { // System.out.println("\nC parentDoc=" + parentDoc); totalHitCount++; float score = Float.NaN; if (trackMaxScore) { score = scorer.score(); maxScore = Math.max(maxScore, score); } // TODO: we could sweep all joinScorers here and // aggregate total child hit count, so we can fill this // in getTopGroups (we wire it to 0 now) if (queueFull) { // System.out.println(" queueFull"); // Fastmatch: return if this hit is not competitive for (int i = 0; ; i++) { final int c = reverseMul[i] * comparators[i].compareBottom(parentDoc); if (c < 0) { // Definitely not competitive. // System.out.println(" skip"); return; } else if (c > 0) { // Definitely competitive. break; } else if (i == compEnd) { // Here c=0. If we're at the last comparator, this doc is not // competitive, since docs are visited in doc Id order, which means // this doc cannot compete with any other document in the queue. // System.out.println(" skip"); return; } } // System.out.println(" competes! doc=" + (docBase + parentDoc)); // This hit is competitive - replace bottom element in queue & adjustTop for (int i = 0; i < comparators.length; i++) { comparators[i].copy(bottom.slot, parentDoc); } if (!trackMaxScore && trackScores) { score = scorer.score(); } bottom.doc = docBase + parentDoc; bottom.readerContext = currentReaderContext; bottom.score = score; copyGroups(bottom); bottom = queue.updateTop(); for (int i = 0; i < comparators.length; i++) { comparators[i].setBottom(bottom.slot); } } else { // Startup transient: queue is not yet full: final int comparatorSlot = totalHitCount - 1; // Copy hit into queue for (int i = 0; i < comparators.length; i++) { comparators[i].copy(comparatorSlot, parentDoc); } // System.out.println(" startup: new OG doc=" + (docBase+parentDoc)); if (!trackMaxScore && trackScores) { score = scorer.score(); } final OneGroup og = new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.length, trackScores); og.readerContext = currentReaderContext; copyGroups(og); bottom = queue.add(og); queueFull = totalHitCount == numParentHits; if (queueFull) { // End of startup transient: queue just filled up: for (int i = 0; i < comparators.length; i++) { comparators[i].setBottom(bottom.slot); } } } }
@Override public void collect(int doc) throws IOException { if (scorer.score() > minScore) { count++; } }
private void doUnionScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException { // if (DEBUG) { // System.out.println(" doUnionScoring"); // } final int maxDoc = context.reader().maxDoc(); final int numDims = dims.length; // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; // NOTE: this is basically a specialized version of // BooleanScorer, to the minShouldMatch=N-1 case, but // carefully tracking which dimension failed to match int nextChunkStart = CHUNK; while (true) { // if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + // docIDs[0]); // } int filledCount = 0; int docID = baseScorer.docID(); // if (DEBUG) { // System.out.println(" base docID=" + docID); // } while (docID < nextChunkStart) { if (acceptDocs == null || acceptDocs.get(docID)) { int slot = docID & MASK; // if (DEBUG) { // System.out.println(" docIDs[slot=" + slot + "]=" + docID + " id=" + // context.reader().document(docID).get("id")); // } // Mark slot as valid: assert docIDs[slot] != docID : "slot=" + slot + " docID=" + docID; docIDs[slot] = docID; scores[slot] = baseScorer.score(); filledSlots[filledCount++] = slot; missingDims[slot] = 0; counts[slot] = 1; } docID = baseScorer.nextDoc(); } if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // First drill-down dim, basically adds SHOULD onto // the baseQuery: // if (DEBUG) { // System.out.println(" dim=0 [" + dims[0].dim + "]"); // } { DocsAndCost dc = dims[0]; docID = dc.approximation.docID(); // if (DEBUG) { // System.out.println(" start docID=" + docID); // } while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID // this also checks that the doc is not deleted && (dc.twoPhase == null || dc.twoPhase.matches())) { // if (DEBUG) { // System.out.println(" set docID=" + docID + " count=2"); // } missingDims[slot] = 1; counts[slot] = 2; } docID = dc.approximation.nextDoc(); } } for (int dim = 1; dim < numDims; dim++) { // if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); // } DocsAndCost dc = dims[dim]; docID = dc.approximation.docID(); // if (DEBUG) { // System.out.println(" start docID=" + docID); // } while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID // also means that the doc is not deleted && counts[slot] >= dim && (dc.twoPhase == null || dc.twoPhase.matches())) { // This doc is still in the running... // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { // if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); // } missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { // if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); // } counts[slot] = dim + 1; } } docID = dc.approximation.nextDoc(); } } // Collect: // System.out.println(" now collect: " + filledCount + " hits"); for (int i = 0; i < filledCount; i++) { // NOTE: This is actually in-order collection, // because we only accept docs originally returned by // the baseScorer (ie that Scorer is AND'd) int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; // if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); // } // System.out.println(" collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " // main.doc=" + collectDocID + " exactCount=" + numDims); if (counts[slot] == 1 + numDims) { // System.out.println(" hit"); collectHit(collector, dims); } else if (counts[slot] == numDims) { // System.out.println(" sw"); collectNearMiss(dims[missingDims[slot]].sidewaysLeafCollector); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }
/** Used when drill downs are highly constraining vs baseQuery. */ private void doDrillDownAdvanceScoring( Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException { final int maxDoc = context.reader().maxDoc(); final int numDims = dims.length; // if (DEBUG) { // System.out.println(" doDrillDownAdvanceScoring"); // } // TODO: maybe a class like BS, instead of parallel arrays int[] filledSlots = new int[CHUNK]; int[] docIDs = new int[CHUNK]; float[] scores = new float[CHUNK]; int[] missingDims = new int[CHUNK]; int[] counts = new int[CHUNK]; docIDs[0] = -1; int nextChunkStart = CHUNK; final FixedBitSet seen = new FixedBitSet(CHUNK); while (true) { // if (DEBUG) { // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + // docIDs[0]); // } // First dim: // if (DEBUG) { // System.out.println(" dim0"); // } DocsAndCost dc = dims[0]; int docID = dc.approximation.docID(); while (docID < nextChunkStart) { if (acceptDocs == null || acceptDocs.get(docID)) { int slot = docID & MASK; if (docIDs[slot] != docID && (dc.twoPhase == null || dc.twoPhase.matches())) { seen.set(slot); // Mark slot as valid: // if (DEBUG) { // System.out.println(" set docID=" + docID + " id=" + // context.reader().document(docID).get("id")); // } docIDs[slot] = docID; missingDims[slot] = 1; counts[slot] = 1; } } docID = dc.approximation.nextDoc(); } // Second dim: // if (DEBUG) { // System.out.println(" dim1"); // } dc = dims[1]; docID = dc.approximation.docID(); while (docID < nextChunkStart) { if (acceptDocs == null || acceptDocs.get(docID) && (dc.twoPhase == null || dc.twoPhase.matches())) { int slot = docID & MASK; if (docIDs[slot] != docID) { // Mark slot as valid: seen.set(slot); // if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=0 id=" + // context.reader().document(docID).get("id")); // } docIDs[slot] = docID; missingDims[slot] = 0; counts[slot] = 1; } else { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= 1) { missingDims[slot] = 2; counts[slot] = 2; // if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=2 id=" + // context.reader().document(docID).get("id")); // } } else { counts[slot] = 1; // if (DEBUG) { // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + // " id=" + context.reader().document(docID).get("id")); // } } } } docID = dc.approximation.nextDoc(); } // After this we can "upgrade" to conjunction, because // any doc not seen by either dim 0 or dim 1 cannot be // a hit or a near miss: // if (DEBUG) { // System.out.println(" baseScorer"); // } // Fold in baseScorer, using advance: int filledCount = 0; int slot0 = 0; while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != DocIdSetIterator.NO_MORE_DOCS) { int ddDocID = docIDs[slot0]; assert ddDocID != -1; int baseDocID = baseScorer.docID(); if (baseDocID < ddDocID) { baseDocID = baseScorer.advance(ddDocID); } if (baseDocID == ddDocID) { // if (DEBUG) { // System.out.println(" keep docID=" + ddDocID + " id=" + // context.reader().document(ddDocID).get("id")); // } scores[slot0] = baseScorer.score(); filledSlots[filledCount++] = slot0; counts[slot0]++; } else { // if (DEBUG) { // System.out.println(" no docID=" + ddDocID + " id=" + // context.reader().document(ddDocID).get("id")); // } docIDs[slot0] = -1; // TODO: we could jump slot0 forward to the // baseDocID ... but we'd need to set docIDs for // intervening slots to -1 } slot0++; } seen.clear(0, CHUNK); if (filledCount == 0) { if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; continue; } // TODO: factor this out & share w/ union scorer, // except we start from dim=2 instead: for (int dim = 2; dim < numDims; dim++) { // if (DEBUG) { // System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]"); // } dc = dims[dim]; docID = dc.approximation.docID(); while (docID < nextChunkStart) { int slot = docID & MASK; if (docIDs[slot] == docID && counts[slot] >= dim && (dc.twoPhase == null || dc.twoPhase.matches())) { // TODO: single-valued dims will always be true // below; we could somehow specialize if (missingDims[slot] >= dim) { // if (DEBUG) { // System.out.println(" set docID=" + docID + " count=" + (dim+2)); // } missingDims[slot] = dim + 1; counts[slot] = dim + 2; } else { // if (DEBUG) { // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); // } counts[slot] = dim + 1; } } // TODO: sometimes use advance? docID = dc.approximation.nextDoc(); } } // Collect: // if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); // } for (int i = 0; i < filledCount; i++) { int slot = filledSlots[i]; collectDocID = docIDs[slot]; collectScore = scores[slot]; // if (DEBUG) { // System.out.println(" docID=" + docIDs[slot] + " count=" + counts[slot]); // } if (counts[slot] == 1 + numDims) { collectHit(collector, dims); } else if (counts[slot] == numDims) { collectNearMiss(dims[missingDims[slot]].sidewaysLeafCollector); } } if (nextChunkStart >= maxDoc) { break; } nextChunkStart += CHUNK; } }