Esempio n. 1
0
  /**
   * Used when base query is highly constraining vs the drilldowns, or when the docs must be scored
   * at once (i.e., like BooleanScorer2, not BooleanScorer). In this case we just .next() on base
   * and .advance() on the dim filters.
   */
  private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims)
      throws IOException {
    // if (DEBUG) {
    //  System.out.println("  doQueryFirstScoring");
    // }
    int docID = baseScorer.docID();

    nextDoc:
    while (docID != PostingsEnum.NO_MORE_DOCS) {
      if (acceptDocs != null && acceptDocs.get(docID) == false) {
        docID = baseScorer.nextDoc();
        continue;
      }
      LeafCollector failedCollector = null;
      for (DocsAndCost dim : dims) {
        // TODO: should we sort this 2nd dimension of
        // docsEnums from most frequent to least?
        if (dim.approximation.docID() < docID) {
          dim.approximation.advance(docID);
        }

        boolean matches = false;
        if (dim.approximation.docID() == docID) {
          if (dim.twoPhase == null) {
            matches = true;
          } else {
            matches = dim.twoPhase.matches();
          }
        }

        if (matches == false) {
          if (failedCollector != null) {
            // More than one dim fails on this document, so
            // it's neither a hit nor a near-miss; move to
            // next doc:
            docID = baseScorer.nextDoc();
            continue nextDoc;
          } else {
            failedCollector = dim.sidewaysLeafCollector;
          }
        }
      }

      collectDocID = docID;

      // TODO: we could score on demand instead since we are
      // daat here:
      collectScore = baseScorer.score();

      if (failedCollector == null) {
        // Hit passed all filters, so it's "real":
        collectHit(collector, dims);
      } else {
        // Hit missed exactly one filter:
        collectNearMiss(failedCollector);
      }

      docID = baseScorer.nextDoc();
    }
  }
 /*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
 @Override
 public float score() throws IOException {
   for (int i = 0; i < valSrcScorers.length; i++) {
     vScores[i] = valSrcScorers[i].score();
   }
   return qWeight
       * provider.customScore(subQueryScorer.docID(), subQueryScorer.score(), vScores);
 }
Esempio n. 3
0
    @Override
    public void collect(int doc) throws IOException {
      if (typeCache == null) {
        return;
      }

      HashedBytesArray parentUid = typeCache.idByDoc(doc);
      uidToScore.put(parentUid, scorer.score());
    }
    @Override
    public float score() throws IOException {
      float score = qWeight * scorer.score() * vals.floatVal(scorer.docID());

      // Current Lucene priority queues can't handle NaN and -Infinity, so
      // map to -Float.MAX_VALUE. This conditional handles both -infinity
      // and NaN since comparisons with NaN are always false.
      return score > Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE;
    }
 @Override
 public void collect(int doc) throws IOException {
   if (counter >= from) {
     docs.add(new ScoreDoc(docBase + doc, trackScores ? scorer.score() : 0f));
   }
   readerState.count++;
   counter++;
   if (counter >= to) {
     throw StopCollectingException;
   }
 }
Esempio n. 6
0
 @Override
 public void collect(int doc) throws IOException {
   if (values != null) {
     long ord = ordinals.getOrd(doc);
     long parentIdx = parentIdsIndex.get(ord);
     if (parentIdx < 0) {
       final BytesRef bytes = values.getValueByOrd(ord);
       final int hash = values.currentValueHash();
       parentIdx = parentIds.add(bytes, hash);
       if (parentIdx < 0) {
         parentIdx = -parentIdx - 1;
         doScore(parentIdx);
       } else {
         scores = bigArrays.grow(scores, parentIdx + 1);
         scores.set(parentIdx, scorer.score());
       }
       parentIdsIndex.set(ord, parentIdx);
     } else {
       doScore(parentIdx);
     }
   }
 }
    @Override
    public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
      Scorer scorer = scorer(context, context.reader().getLiveDocs());
      if (scorer != null) {
        int newDoc = scorer.advance(doc);
        if (newDoc == doc) {
          float score = scorer.score();
          ComplexExplanation result = new ComplexExplanation();
          result.setDescription("ImageHashLimitQuery, product of:");
          result.setValue(score);
          if (getBoost() != 1.0f) {
            result.addDetail(new Explanation(getBoost(), "boost"));
            score = score / getBoost();
          }
          result.addDetail(new Explanation(score, "image score (1/distance)"));
          result.setMatch(true);
          return result;
        }
      }

      return new ComplexExplanation(false, 0.0f, "no matching term");
    }
Esempio n. 8
0
 @Override
 protected void collect(int doc, HashedBytesArray parentUid) throws IOException {
   float currentScore = scorer.score();
   switch (scoreType) {
     case SUM:
       uidToScore.addTo(parentUid, currentScore);
       break;
     case MAX:
       if (uidToScore.containsKey(parentUid)) {
         float previousScore = uidToScore.lget();
         if (currentScore > previousScore) {
           uidToScore.lset(currentScore);
         }
       } else {
         uidToScore.put(parentUid, currentScore);
       }
       break;
     case AVG:
       assert false : "AVG has its own collector";
     default:
       assert false : "Are we missing a score type here? -- " + scoreType;
       break;
   }
 }
Esempio n. 9
0
 @Override
 public float score() throws IOException {
   return (_func.useInnerScore())
       ? _func.newScore(_innerScorer.score(), _innerScorer.docID())
       : _func.newScore(_innerScorer.docID());
 }
  @Override
  public void collect(int parentDoc) throws IOException {
    // System.out.println("\nC parentDoc=" + parentDoc);
    totalHitCount++;

    float score = Float.NaN;

    if (trackMaxScore) {
      score = scorer.score();
      maxScore = Math.max(maxScore, score);
    }

    // TODO: we could sweep all joinScorers here and
    // aggregate total child hit count, so we can fill this
    // in getTopGroups (we wire it to 0 now)

    if (queueFull) {
      // System.out.println("  queueFull");
      // Fastmatch: return if this hit is not competitive
      for (int i = 0; ; i++) {
        final int c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
        if (c < 0) {
          // Definitely not competitive.
          // System.out.println("    skip");
          return;
        } else if (c > 0) {
          // Definitely competitive.
          break;
        } else if (i == compEnd) {
          // Here c=0. If we're at the last comparator, this doc is not
          // competitive, since docs are visited in doc Id order, which means
          // this doc cannot compete with any other document in the queue.
          // System.out.println("    skip");
          return;
        }
      }

      // System.out.println("    competes!  doc=" + (docBase + parentDoc));

      // This hit is competitive - replace bottom element in queue & adjustTop
      for (int i = 0; i < comparators.length; i++) {
        comparators[i].copy(bottom.slot, parentDoc);
      }
      if (!trackMaxScore && trackScores) {
        score = scorer.score();
      }
      bottom.doc = docBase + parentDoc;
      bottom.readerContext = currentReaderContext;
      bottom.score = score;
      copyGroups(bottom);
      bottom = queue.updateTop();

      for (int i = 0; i < comparators.length; i++) {
        comparators[i].setBottom(bottom.slot);
      }
    } else {
      // Startup transient: queue is not yet full:
      final int comparatorSlot = totalHitCount - 1;

      // Copy hit into queue
      for (int i = 0; i < comparators.length; i++) {
        comparators[i].copy(comparatorSlot, parentDoc);
      }
      // System.out.println("  startup: new OG doc=" + (docBase+parentDoc));
      if (!trackMaxScore && trackScores) {
        score = scorer.score();
      }
      final OneGroup og =
          new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.length, trackScores);
      og.readerContext = currentReaderContext;
      copyGroups(og);
      bottom = queue.add(og);
      queueFull = totalHitCount == numParentHits;
      if (queueFull) {
        // End of startup transient: queue just filled up:
        for (int i = 0; i < comparators.length; i++) {
          comparators[i].setBottom(bottom.slot);
        }
      }
    }
  }
Esempio n. 11
0
 @Override
 public void collect(int doc) throws IOException {
   if (scorer.score() > minScore) {
     count++;
   }
 }
Esempio n. 12
0
  private void doUnionScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims)
      throws IOException {
    // if (DEBUG) {
    //  System.out.println("  doUnionScoring");
    // }

    final int maxDoc = context.reader().maxDoc();
    final int numDims = dims.length;

    // TODO: maybe a class like BS, instead of parallel arrays
    int[] filledSlots = new int[CHUNK];
    int[] docIDs = new int[CHUNK];
    float[] scores = new float[CHUNK];
    int[] missingDims = new int[CHUNK];
    int[] counts = new int[CHUNK];

    docIDs[0] = -1;

    // NOTE: this is basically a specialized version of
    // BooleanScorer, to the minShouldMatch=N-1 case, but
    // carefully tracking which dimension failed to match

    int nextChunkStart = CHUNK;

    while (true) {
      // if (DEBUG) {
      //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" +
      // docIDs[0]);
      // }
      int filledCount = 0;
      int docID = baseScorer.docID();
      // if (DEBUG) {
      //  System.out.println("  base docID=" + docID);
      // }
      while (docID < nextChunkStart) {
        if (acceptDocs == null || acceptDocs.get(docID)) {
          int slot = docID & MASK;
          // if (DEBUG) {
          //  System.out.println("    docIDs[slot=" + slot + "]=" + docID + " id=" +
          // context.reader().document(docID).get("id"));
          // }

          // Mark slot as valid:
          assert docIDs[slot] != docID : "slot=" + slot + " docID=" + docID;
          docIDs[slot] = docID;
          scores[slot] = baseScorer.score();
          filledSlots[filledCount++] = slot;
          missingDims[slot] = 0;
          counts[slot] = 1;
        }
        docID = baseScorer.nextDoc();
      }

      if (filledCount == 0) {
        if (nextChunkStart >= maxDoc) {
          break;
        }
        nextChunkStart += CHUNK;
        continue;
      }

      // First drill-down dim, basically adds SHOULD onto
      // the baseQuery:
      // if (DEBUG) {
      //  System.out.println("  dim=0 [" + dims[0].dim + "]");
      // }
      {
        DocsAndCost dc = dims[0];
        docID = dc.approximation.docID();
        // if (DEBUG) {
        //  System.out.println("    start docID=" + docID);
        // }
        while (docID < nextChunkStart) {
          int slot = docID & MASK;
          if (docIDs[slot] == docID // this also checks that the doc is not deleted
              && (dc.twoPhase == null || dc.twoPhase.matches())) {
            // if (DEBUG) {
            //  System.out.println("      set docID=" + docID + " count=2");
            // }
            missingDims[slot] = 1;
            counts[slot] = 2;
          }
          docID = dc.approximation.nextDoc();
        }
      }

      for (int dim = 1; dim < numDims; dim++) {
        // if (DEBUG) {
        //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
        // }

        DocsAndCost dc = dims[dim];
        docID = dc.approximation.docID();
        // if (DEBUG) {
        //  System.out.println("    start docID=" + docID);
        // }
        while (docID < nextChunkStart) {
          int slot = docID & MASK;
          if (docIDs[slot] == docID // also means that the doc is not deleted
              && counts[slot] >= dim
              && (dc.twoPhase == null || dc.twoPhase.matches())) {
            // This doc is still in the running...
            // TODO: single-valued dims will always be true
            // below; we could somehow specialize
            if (missingDims[slot] >= dim) {
              // if (DEBUG) {
              //  System.out.println("      set docID=" + docID + " count=" + (dim+2));
              // }
              missingDims[slot] = dim + 1;
              counts[slot] = dim + 2;
            } else {
              // if (DEBUG) {
              //  System.out.println("      set docID=" + docID + " missing count=" + (dim+1));
              // }
              counts[slot] = dim + 1;
            }
          }
          docID = dc.approximation.nextDoc();
        }
      }

      // Collect:
      // System.out.println("  now collect: " + filledCount + " hits");
      for (int i = 0; i < filledCount; i++) {
        // NOTE: This is actually in-order collection,
        // because we only accept docs originally returned by
        // the baseScorer (ie that Scorer is AND'd)
        int slot = filledSlots[i];
        collectDocID = docIDs[slot];
        collectScore = scores[slot];
        // if (DEBUG) {
        //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
        // }
        // System.out.println("  collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + "
        // main.doc=" + collectDocID + " exactCount=" + numDims);
        if (counts[slot] == 1 + numDims) {
          // System.out.println("    hit");
          collectHit(collector, dims);
        } else if (counts[slot] == numDims) {
          // System.out.println("    sw");
          collectNearMiss(dims[missingDims[slot]].sidewaysLeafCollector);
        }
      }

      if (nextChunkStart >= maxDoc) {
        break;
      }

      nextChunkStart += CHUNK;
    }
  }
Esempio n. 13
0
  /** Used when drill downs are highly constraining vs baseQuery. */
  private void doDrillDownAdvanceScoring(
      Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException {
    final int maxDoc = context.reader().maxDoc();
    final int numDims = dims.length;

    // if (DEBUG) {
    //  System.out.println("  doDrillDownAdvanceScoring");
    // }

    // TODO: maybe a class like BS, instead of parallel arrays
    int[] filledSlots = new int[CHUNK];
    int[] docIDs = new int[CHUNK];
    float[] scores = new float[CHUNK];
    int[] missingDims = new int[CHUNK];
    int[] counts = new int[CHUNK];

    docIDs[0] = -1;
    int nextChunkStart = CHUNK;

    final FixedBitSet seen = new FixedBitSet(CHUNK);

    while (true) {
      // if (DEBUG) {
      //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" +
      // docIDs[0]);
      // }

      // First dim:
      // if (DEBUG) {
      //  System.out.println("  dim0");
      // }
      DocsAndCost dc = dims[0];
      int docID = dc.approximation.docID();
      while (docID < nextChunkStart) {
        if (acceptDocs == null || acceptDocs.get(docID)) {
          int slot = docID & MASK;

          if (docIDs[slot] != docID && (dc.twoPhase == null || dc.twoPhase.matches())) {
            seen.set(slot);
            // Mark slot as valid:
            // if (DEBUG) {
            //  System.out.println("    set docID=" + docID + " id=" +
            // context.reader().document(docID).get("id"));
            // }
            docIDs[slot] = docID;
            missingDims[slot] = 1;
            counts[slot] = 1;
          }
        }

        docID = dc.approximation.nextDoc();
      }

      // Second dim:
      // if (DEBUG) {
      //  System.out.println("  dim1");
      // }
      dc = dims[1];
      docID = dc.approximation.docID();
      while (docID < nextChunkStart) {
        if (acceptDocs == null
            || acceptDocs.get(docID) && (dc.twoPhase == null || dc.twoPhase.matches())) {
          int slot = docID & MASK;

          if (docIDs[slot] != docID) {
            // Mark slot as valid:
            seen.set(slot);
            // if (DEBUG) {
            //  System.out.println("    set docID=" + docID + " missingDim=0 id=" +
            // context.reader().document(docID).get("id"));
            // }
            docIDs[slot] = docID;
            missingDims[slot] = 0;
            counts[slot] = 1;
          } else {
            // TODO: single-valued dims will always be true
            // below; we could somehow specialize
            if (missingDims[slot] >= 1) {
              missingDims[slot] = 2;
              counts[slot] = 2;
              // if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missingDim=2 id=" +
              // context.reader().document(docID).get("id"));
              // }
            } else {
              counts[slot] = 1;
              // if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missingDim=" + missingDims[slot] +
              // " id=" + context.reader().document(docID).get("id"));
              // }
            }
          }
        }

        docID = dc.approximation.nextDoc();
      }

      // After this we can "upgrade" to conjunction, because
      // any doc not seen by either dim 0 or dim 1 cannot be
      // a hit or a near miss:

      // if (DEBUG) {
      //  System.out.println("  baseScorer");
      // }

      // Fold in baseScorer, using advance:
      int filledCount = 0;
      int slot0 = 0;
      while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != DocIdSetIterator.NO_MORE_DOCS) {
        int ddDocID = docIDs[slot0];
        assert ddDocID != -1;

        int baseDocID = baseScorer.docID();
        if (baseDocID < ddDocID) {
          baseDocID = baseScorer.advance(ddDocID);
        }
        if (baseDocID == ddDocID) {
          // if (DEBUG) {
          //  System.out.println("    keep docID=" + ddDocID + " id=" +
          // context.reader().document(ddDocID).get("id"));
          // }
          scores[slot0] = baseScorer.score();
          filledSlots[filledCount++] = slot0;
          counts[slot0]++;
        } else {
          // if (DEBUG) {
          //  System.out.println("    no docID=" + ddDocID + " id=" +
          // context.reader().document(ddDocID).get("id"));
          // }
          docIDs[slot0] = -1;

          // TODO: we could jump slot0 forward to the
          // baseDocID ... but we'd need to set docIDs for
          // intervening slots to -1
        }
        slot0++;
      }
      seen.clear(0, CHUNK);

      if (filledCount == 0) {
        if (nextChunkStart >= maxDoc) {
          break;
        }
        nextChunkStart += CHUNK;
        continue;
      }

      // TODO: factor this out & share w/ union scorer,
      // except we start from dim=2 instead:
      for (int dim = 2; dim < numDims; dim++) {
        // if (DEBUG) {
        //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
        // }
        dc = dims[dim];
        docID = dc.approximation.docID();
        while (docID < nextChunkStart) {
          int slot = docID & MASK;
          if (docIDs[slot] == docID
              && counts[slot] >= dim
              && (dc.twoPhase == null || dc.twoPhase.matches())) {
            // TODO: single-valued dims will always be true
            // below; we could somehow specialize
            if (missingDims[slot] >= dim) {
              // if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " count=" + (dim+2));
              // }
              missingDims[slot] = dim + 1;
              counts[slot] = dim + 2;
            } else {
              // if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
              // }
              counts[slot] = dim + 1;
            }
          }

          // TODO: sometimes use advance?
          docID = dc.approximation.nextDoc();
        }
      }

      // Collect:
      // if (DEBUG) {
      //  System.out.println("  now collect: " + filledCount + " hits");
      // }
      for (int i = 0; i < filledCount; i++) {
        int slot = filledSlots[i];
        collectDocID = docIDs[slot];
        collectScore = scores[slot];
        // if (DEBUG) {
        //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
        // }
        if (counts[slot] == 1 + numDims) {
          collectHit(collector, dims);
        } else if (counts[slot] == numDims) {
          collectNearMiss(dims[missingDims[slot]].sidewaysLeafCollector);
        }
      }

      if (nextChunkStart >= maxDoc) {
        break;
      }

      nextChunkStart += CHUNK;
    }
  }