Beispiel #1
0
 public static Map<String, Integer> termFrequencies(
     IndexSearcher indexSearcher,
     Query documentFilterQuery,
     String fieldName,
     String propName,
     String altName) {
   try {
     String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName);
     Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false);
     Map<String, Integer> freq = new HashMap<>();
     IndexReader indexReader = indexSearcher.getIndexReader();
     for (LeafReaderContext arc : indexReader.leaves()) {
       if (weight == null) throw new RuntimeException("weight == null");
       if (arc == null) throw new RuntimeException("arc == null");
       if (arc.reader() == null) throw new RuntimeException("arc.reader() == null");
       Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs());
       if (scorer != null) {
         while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
           getFrequenciesFromTermVector(
               indexReader, scorer.docID() + arc.docBase, luceneField, freq);
         }
       }
     }
     return freq;
   } catch (IOException e) {
     throw ExUtil.wrapRuntimeException(e);
   }
 }
  /**
   * Used when base query is highly constraining vs the drilldowns, or when the docs must be scored
   * at once (i.e., like BooleanScorer2, not BooleanScorer). In this case we just .next() on base
   * and .advance() on the dim filters.
   */
  private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims)
      throws IOException {
    // if (DEBUG) {
    //  System.out.println("  doQueryFirstScoring");
    // }
    int docID = baseScorer.docID();

    nextDoc:
    while (docID != PostingsEnum.NO_MORE_DOCS) {
      if (acceptDocs != null && acceptDocs.get(docID) == false) {
        docID = baseScorer.nextDoc();
        continue;
      }
      LeafCollector failedCollector = null;
      for (DocsAndCost dim : dims) {
        // TODO: should we sort this 2nd dimension of
        // docsEnums from most frequent to least?
        if (dim.approximation.docID() < docID) {
          dim.approximation.advance(docID);
        }

        boolean matches = false;
        if (dim.approximation.docID() == docID) {
          if (dim.twoPhase == null) {
            matches = true;
          } else {
            matches = dim.twoPhase.matches();
          }
        }

        if (matches == false) {
          if (failedCollector != null) {
            // More than one dim fails on this document, so
            // it's neither a hit nor a near-miss; move to
            // next doc:
            docID = baseScorer.nextDoc();
            continue nextDoc;
          } else {
            failedCollector = dim.sidewaysLeafCollector;
          }
        }
      }

      collectDocID = docID;

      // TODO: we could score on demand instead since we are
      // daat here:
      collectScore = baseScorer.score();

      if (failedCollector == null) {
        // Hit passed all filters, so it's "real":
        collectHit(collector, dims);
      } else {
        // Hit missed exactly one filter:
        collectNearMiss(failedCollector);
      }

      docID = baseScorer.nextDoc();
    }
  }
  private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity(
      SearchContext context,
      int nestedSubDocId,
      LeafReaderContext subReaderContext,
      DocumentMapper documentMapper,
      ObjectMapper nestedObjectMapper)
      throws IOException {
    int currentParent = nestedSubDocId;
    ObjectMapper nestedParentObjectMapper;
    ObjectMapper current = nestedObjectMapper;
    String originalName = nestedObjectMapper.name();
    InternalSearchHit.InternalNestedIdentity nestedIdentity = null;
    do {
      Query parentFilter;
      nestedParentObjectMapper = documentMapper.findParentObjectMapper(current);
      if (nestedParentObjectMapper != null) {
        if (nestedParentObjectMapper.nested().isNested() == false) {
          current = nestedParentObjectMapper;
          continue;
        }
        parentFilter = nestedParentObjectMapper.nestedTypeFilter();
      } else {
        parentFilter = Queries.newNonNestedFilter();
      }

      Query childFilter = nestedObjectMapper.nestedTypeFilter();
      if (childFilter == null) {
        current = nestedParentObjectMapper;
        continue;
      }
      final Weight childWeight = context.searcher().createNormalizedWeight(childFilter, false);
      Scorer childScorer = childWeight.scorer(subReaderContext);
      if (childScorer == null) {
        current = nestedParentObjectMapper;
        continue;
      }
      DocIdSetIterator childIter = childScorer.iterator();

      BitSet parentBits =
          context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext);

      int offset = 0;
      int nextParent = parentBits.nextSetBit(currentParent);
      for (int docId = childIter.advance(currentParent + 1);
          docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS;
          docId = childIter.nextDoc()) {
        offset++;
      }
      currentParent = nextParent;
      current = nestedObjectMapper = nestedParentObjectMapper;
      int currentPrefix = current == null ? 0 : current.name().length() + 1;
      nestedIdentity =
          new InternalSearchHit.InternalNestedIdentity(
              originalName.substring(currentPrefix), offset, nestedIdentity);
      if (current != null) {
        originalName = current.name();
      }
    } while (current != null);
    return nestedIdentity;
  }
  @Override
  public void setScorer(Scorer scorer) {
    // System.out.println("C.setScorer scorer=" + scorer);
    // Since we invoke .score(), and the comparators likely
    // do as well, cache it so it's only "really" computed
    // once:
    this.scorer = new ScoreCachingWrappingScorer(scorer);
    for (int compIDX = 0; compIDX < comparators.length; compIDX++) {
      comparators[compIDX].setScorer(this.scorer);
    }
    Arrays.fill(joinScorers, null);

    Queue<Scorer> queue = new LinkedList<>();
    // System.out.println("\nqueue: add top scorer=" + scorer);
    queue.add(scorer);
    while ((scorer = queue.poll()) != null) {
      // System.out.println("  poll: " + scorer + "; " + scorer.getWeight().getQuery());
      if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) {
        enroll(
            (ToParentBlockJoinQuery) scorer.getWeight().getQuery(),
            (ToParentBlockJoinQuery.BlockJoinScorer) scorer);
      }

      for (ChildScorer sub : scorer.getChildren()) {
        // System.out.println("  add sub: " + sub.child + "; " + sub.child.getWeight().getQuery());
        queue.add(sub.child);
      }
    }
  }
  @Override
  public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
    Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
    if (scorer != null) {
      int newDoc = scorer.advance(doc);
      if (newDoc == doc) {
        float freq = scorer.freq();
        SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
        ComplexExplanation result = new ComplexExplanation();
        result.setDescription(
            "weight("
                + getQuery()
                + " in "
                + doc
                + ") ["
                + similarity.getClass().getSimpleName()
                + "], result of:");
        Explanation scoreExplanation =
            docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
        result.addDetail(scoreExplanation);
        result.setValue(scoreExplanation.getValue());
        result.setMatch(true);
        return result;
      }
    }

    return new ComplexExplanation(false, 0.0f, "no matching term");
  }
    // NOTE: acceptDocs applies (and is checked) only in the
    // parent document space
    @Override
    public Scorer scorer(LeafReaderContext readerContext) throws IOException {

      final Scorer childScorer = childWeight.scorer(readerContext);
      if (childScorer == null) {
        // No matches
        return null;
      }

      final int firstChildDoc = childScorer.iterator().nextDoc();
      if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
        // No matches
        return null;
      }

      // NOTE: this does not take accept docs into account, the responsibility
      // to not match deleted docs is on the scorer
      final BitSet parents = parentsFilter.getBitSet(readerContext);

      if (parents == null) {
        // No matches
        return null;
      }

      return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
    }
 /*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
 @Override
 public float score() throws IOException {
   for (int i = 0; i < valSrcScorers.length; i++) {
     vScores[i] = valSrcScorers[i].score();
   }
   return qWeight
       * provider.customScore(subQueryScorer.docID(), subQueryScorer.score(), vScores);
 }
  /** Replay the wrapped collector, but only on a selection of buckets. */
  @Override
  public void prepareSelectedBuckets(long... selectedBuckets) throws IOException {
    if (!finished) {
      throw new IllegalStateException(
          "Cannot replay yet, collection is not finished: postCollect() has not been called");
    }
    if (this.selectedBuckets != null) {
      throw new IllegalStateException("Already been replayed");
    }

    final LongHash hash = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE);
    for (long bucket : selectedBuckets) {
      hash.add(bucket);
    }
    this.selectedBuckets = hash;

    collector.preCollection();
    boolean needsScores = collector.needsScores();
    Weight weight = null;
    if (needsScores) {
      weight =
          aggContext
              .searchContext()
              .searcher()
              .createNormalizedWeight(aggContext.searchContext().query(), true);
    }
    for (Entry entry : entries) {
      final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context);
      DocIdSetIterator docIt = null;
      if (needsScores && entry.docDeltas.size() > 0) {
        Scorer scorer = weight.scorer(entry.context);
        // We don't need to check if the scorer is null
        // since we are sure that there are documents to replay (entry.docDeltas it not empty).
        docIt = scorer.iterator();
        leafCollector.setScorer(scorer);
      }
      final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator();
      final PackedLongValues.Iterator buckets = entry.buckets.iterator();
      int doc = 0;
      for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
        doc += docDeltaIterator.next();
        final long bucket = buckets.next();
        final long rebasedBucket = hash.find(bucket);
        if (rebasedBucket != -1) {
          if (needsScores) {
            if (docIt.docID() < doc) {
              docIt.advance(doc);
            }
            // aggregations should only be replayed on matching documents
            assert docIt.docID() == doc;
          }
          leafCollector.collect(doc, rebasedBucket);
        }
      }
    }

    collector.postCollection();
  }
    @Override
    public float score() throws IOException {
      float score = qWeight * scorer.score() * vals.floatVal(scorer.docID());

      // Current Lucene priority queues can't handle NaN and -Infinity, so
      // map to -Float.MAX_VALUE. This conditional handles both -infinity
      // and NaN since comparisons with NaN are always false.
      return score > Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE;
    }
Beispiel #10
0
 protected AugmentScorer(
     BoboIndexReader reader, Scorer innerScorer, ScoreAugmentFunction func, JSONObject jsonParms)
     throws IOException {
   super(innerScorer.getSimilarity());
   _innerScorer = innerScorer;
   _func = func;
   _func.initializeReader(reader, jsonParms);
 }
Beispiel #11
0
    @Override
    public void collect(int doc) throws IOException {
      if (typeCache == null) {
        return;
      }

      HashedBytesArray parentUid = typeCache.idByDoc(doc);
      uidToScore.put(parentUid, scorer.score());
    }
 @Override
 public int advance(int target) throws IOException {
   int doc = subQueryScorer.advance(target);
   if (doc != NO_MORE_DOCS) {
     for (int i = 0; i < valSrcScorers.length; i++) {
       valSrcScorers[i].advance(doc);
     }
   }
   return doc;
 }
  /**
   * Get a list of all Spans made available from the passed-in Scorer
   *
   * @param scorer the scorer to extract spans from
   * @param errorOnNoSpans if true, throw an error if no Spans can be extracted from the Scorer or
   *     any of its children
   * @return a List of Spans
   */
  public static List<Spans> getSpans(Scorer scorer, boolean errorOnNoSpans) {

    List<Spans> spans = new ArrayList<>();
    if (scorer instanceof Spans) {
      spans.add((Spans) scorer);
      return spans;
    }

    Collection<Scorer.ChildScorer> children = scorer.getChildren();
    if (errorOnNoSpans && children.size() == 0)
      throw new RuntimeException(
          "Couldn't extract SpanScorer from " + scorer.getClass().getCanonicalName());

    for (Scorer.ChildScorer child : children) {
      spans.addAll(getSpans(child.child, errorOnNoSpans));
    }

    return spans;
  }
 DocsAndCost(Scorer scorer, Collector sidewaysCollector) {
   final TwoPhaseIterator twoPhase = scorer.asTwoPhaseIterator();
   if (twoPhase == null) {
     this.approximation = scorer;
     this.twoPhase = null;
   } else {
     this.approximation = twoPhase.approximation();
     this.twoPhase = twoPhase;
   }
   this.sidewaysCollector = sidewaysCollector;
 }
 @Override
 public void collect(int doc) throws IOException {
   if (counter >= from) {
     docs.add(new ScoreDoc(docBase + doc, trackScores ? scorer.score() : 0f));
   }
   readerState.count++;
   counter++;
   if (counter >= to) {
     throw StopCollectingException;
   }
 }
 @Override
 protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector)
     throws IOException {
   for (LeafReaderContext ctx : leaves) { // search each subreader
     // we force the use of Scorer (not BulkScorer) to make sure
     // that the scorer passed to LeafCollector.setScorer supports
     // Scorer.getChildren
     Scorer scorer = weight.scorer(ctx);
     if (scorer != null) {
       final LeafCollector leafCollector = collector.getLeafCollector(ctx);
       leafCollector.setScorer(scorer);
       final Bits liveDocs = ctx.reader().getLiveDocs();
       for (int doc = scorer.nextDoc();
           doc != DocIdSetIterator.NO_MORE_DOCS;
           doc = scorer.nextDoc()) {
         if (liveDocs == null || liveDocs.get(doc)) {
           leafCollector.collect(doc);
         }
       }
     }
   }
 }
    @Override
    public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
      Scorer scorer = scorer(context, context.reader().getLiveDocs());
      if (scorer != null) {
        int newDoc = scorer.advance(doc);
        if (newDoc == doc) {
          float score = scorer.score();
          ComplexExplanation result = new ComplexExplanation();
          result.setDescription("ImageHashLimitQuery, product of:");
          result.setValue(score);
          if (getBoost() != 1.0f) {
            result.addDetail(new Explanation(getBoost(), "boost"));
            score = score / getBoost();
          }
          result.addDetail(new Explanation(score, "image score (1/distance)"));
          result.setMatch(true);
          return result;
        }
      }

      return new ComplexExplanation(false, 0.0f, "no matching term");
    }
  /**
   * Collect all Spans extracted from a Scorer using a SpanCollector
   *
   * @param scorer the scorer to extract Spans from
   * @param collector the SpanCollector
   * @param errorOnNoSpans if true, throw an error if no Spans can be extracted from the Scorer or
   *     any of its children
   * @throws IOException on error
   */
  public static void collect(Scorer scorer, SpanCollector collector, boolean errorOnNoSpans)
      throws IOException {

    List<Spans> allSpans = getSpans(scorer, errorOnNoSpans);
    int doc = scorer.docID();

    for (Spans spans : allSpans) {
      int spanDoc = spans.docID();
      // if the Scorer advances lazily, then not all of its subspans may be on
      // the correct document
      if (spanDoc == doc || (spanDoc < doc && spans.advance(doc) == doc)) {
        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
          spans.collect(collector);
        }
      }
    }
  }
Beispiel #19
0
 @Override
 public void collect(int doc) throws IOException {
   if (values != null) {
     long ord = ordinals.getOrd(doc);
     long parentIdx = parentIdsIndex.get(ord);
     if (parentIdx < 0) {
       final BytesRef bytes = values.getValueByOrd(ord);
       final int hash = values.currentValueHash();
       parentIdx = parentIds.add(bytes, hash);
       if (parentIdx < 0) {
         parentIdx = -parentIdx - 1;
         doScore(parentIdx);
       } else {
         scores = bigArrays.grow(scores, parentIdx + 1);
         scores.set(parentIdx, scorer.score());
       }
       parentIdsIndex.set(ord, parentIdx);
     } else {
       doScore(parentIdx);
     }
   }
 }
Beispiel #20
0
 @Override
 protected void collect(int doc, HashedBytesArray parentUid) throws IOException {
   float currentScore = scorer.score();
   switch (scoreType) {
     case SUM:
       uidToScore.addTo(parentUid, currentScore);
       break;
     case MAX:
       if (uidToScore.containsKey(parentUid)) {
         float previousScore = uidToScore.lget();
         if (currentScore > previousScore) {
           uidToScore.lset(currentScore);
         }
       } else {
         uidToScore.put(parentUid, currentScore);
       }
       break;
     case AVG:
       assert false : "AVG has its own collector";
     default:
       assert false : "Are we missing a score type here? -- " + scoreType;
       break;
   }
 }
  @Override
  public int score(LeafCollector collector, Bits acceptDocs, int min, int maxDoc)
      throws IOException {
    if (min != 0) {
      throw new IllegalArgumentException("min must be 0, got " + min);
    }
    if (maxDoc != Integer.MAX_VALUE) {
      throw new IllegalArgumentException("maxDoc must be Integer.MAX_VALUE");
    }
    // if (DEBUG) {
    //  System.out.println("\nscore: reader=" + context.reader());
    // }
    // System.out.println("score r=" + context.reader());
    FakeScorer scorer = new FakeScorer();
    collector.setScorer(scorer);
    if (drillDownCollector != null) {
      drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
      drillDownLeafCollector.setScorer(scorer);
    } else {
      drillDownLeafCollector = null;
    }
    for (DocsAndCost dim : dims) {
      dim.sidewaysLeafCollector = dim.sidewaysCollector.getLeafCollector(context);
      dim.sidewaysLeafCollector.setScorer(scorer);
    }

    // TODO: if we ever allow null baseScorer ... it will
    // mean we DO score docs out of order ... hmm, or if we
    // change up the order of the conjuntions below
    assert baseScorer != null;

    // some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc
    long baseQueryCost = baseScorer.cost();

    final int numDims = dims.length;

    long drillDownCost = 0;
    for (int dim = 0; dim < numDims; dim++) {
      drillDownCost += dims[dim].approximation.cost();
    }

    long drillDownAdvancedCost = 0;
    if (numDims > 1) {
      drillDownAdvancedCost = dims[1].approximation.cost();
    }

    // Position all scorers to their first matching doc:
    baseScorer.nextDoc();
    for (DocsAndCost dim : dims) {
      dim.approximation.nextDoc();
    }

    /*
    System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount);
    System.out.println("  maxDoc=" + context.reader().maxDoc());
    System.out.println("  maxCost=" + maxCost);
    System.out.println("  dims[0].freq=" + dims[0].freq);
    if (numDims > 1) {
      System.out.println("  dims[1].freq=" + dims[1].freq);
    }
    */

    if (scoreSubDocsAtOnce || baseQueryCost < drillDownCost / 10) {
      // System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length
      // + " bits.length=" + bits.length);
      doQueryFirstScoring(acceptDocs, collector, dims);
    } else if (numDims > 1 && drillDownAdvancedCost < baseQueryCost / 10) {
      // System.out.println("drillDownAdvance");
      doDrillDownAdvanceScoring(acceptDocs, collector, dims);
    } else {
      // System.out.println("union");
      doUnionScoring(acceptDocs, collector, dims);
    }

    return Integer.MAX_VALUE;
  }
  @Override
  public void collect(int parentDoc) throws IOException {
    // System.out.println("\nC parentDoc=" + parentDoc);
    totalHitCount++;

    float score = Float.NaN;

    if (trackMaxScore) {
      score = scorer.score();
      maxScore = Math.max(maxScore, score);
    }

    // TODO: we could sweep all joinScorers here and
    // aggregate total child hit count, so we can fill this
    // in getTopGroups (we wire it to 0 now)

    if (queueFull) {
      // System.out.println("  queueFull");
      // Fastmatch: return if this hit is not competitive
      for (int i = 0; ; i++) {
        final int c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
        if (c < 0) {
          // Definitely not competitive.
          // System.out.println("    skip");
          return;
        } else if (c > 0) {
          // Definitely competitive.
          break;
        } else if (i == compEnd) {
          // Here c=0. If we're at the last comparator, this doc is not
          // competitive, since docs are visited in doc Id order, which means
          // this doc cannot compete with any other document in the queue.
          // System.out.println("    skip");
          return;
        }
      }

      // System.out.println("    competes!  doc=" + (docBase + parentDoc));

      // This hit is competitive - replace bottom element in queue & adjustTop
      for (int i = 0; i < comparators.length; i++) {
        comparators[i].copy(bottom.slot, parentDoc);
      }
      if (!trackMaxScore && trackScores) {
        score = scorer.score();
      }
      bottom.doc = docBase + parentDoc;
      bottom.readerContext = currentReaderContext;
      bottom.score = score;
      copyGroups(bottom);
      bottom = queue.updateTop();

      for (int i = 0; i < comparators.length; i++) {
        comparators[i].setBottom(bottom.slot);
      }
    } else {
      // Startup transient: queue is not yet full:
      final int comparatorSlot = totalHitCount - 1;

      // Copy hit into queue
      for (int i = 0; i < comparators.length; i++) {
        comparators[i].copy(comparatorSlot, parentDoc);
      }
      // System.out.println("  startup: new OG doc=" + (docBase+parentDoc));
      if (!trackMaxScore && trackScores) {
        score = scorer.score();
      }
      final OneGroup og =
          new OneGroup(comparatorSlot, docBase + parentDoc, score, joinScorers.length, trackScores);
      og.readerContext = currentReaderContext;
      copyGroups(og);
      bottom = queue.add(og);
      queueFull = totalHitCount == numParentHits;
      if (queueFull) {
        // End of startup transient: queue just filled up:
        for (int i = 0; i < comparators.length; i++) {
          comparators[i].setBottom(bottom.slot);
        }
      }
    }
  }
 /** not a direct test of NearSpans, but a demonstration of how/when this causes problems */
 public void testSpanNearScorerSkipTo1() throws Exception {
   SpanNearQuery q = makeQuery();
   Weight w = q.weight(searcher);
   Scorer s = w.scorer(searcher.getIndexReader(), true, false);
   assertEquals(1, s.advance(1));
 }
Beispiel #24
0
 @Override
 public void collect(int doc) throws IOException {
   if (scorer.score() > minScore) {
     count++;
   }
 }
 @Override
 public int docID() {
   return subQueryScorer.docID();
 }
 @Override
 public int nextDoc() throws IOException {
   return scorer.nextDoc();
 }
 @Override
 public int advance(int target) throws IOException {
   return scorer.advance(target);
 }
 @Override
 public int docID() {
   return scorer.docID();
 }
Beispiel #29
0
 @Override
 public float score() throws IOException {
   return (_func.useInnerScore())
       ? _func.newScore(_innerScorer.score(), _innerScorer.docID())
       : _func.newScore(_innerScorer.docID());
 }
Beispiel #30
0
 @Override
 public int docID() {
   return _innerScorer.docID();
 }