コード例 #1
0
 private void pruneSimilar(TopDocs docs) throws IOException {
   if (docs.scoreDocs.length == 0) {
     return;
   }
   int cutoff = docs.scoreDocs.length;
   double threshold = 0.005 * docs.scoreDocs[0].score;
   for (int i = 0, j = 100; j < docs.scoreDocs.length; i++, j++) {
     float delta = docs.scoreDocs[i].score - docs.scoreDocs[j].score;
     if (delta < threshold) {
       cutoff = j;
       break;
     }
   }
   if (cutoff < docs.scoreDocs.length) {
     //            LOG.info("pruned results from " + docs.scoreDocs.length + " to " + cutoff);
     docs.scoreDocs = ArrayUtils.subarray(docs.scoreDocs, 0, cutoff);
   }
 }
  /**
   * Accumulates groups for the BlockJoinQuery specified by its slot.
   *
   * @param slot Search query's slot
   * @param offset Parent docs offset
   * @param maxDocsPerGroup Upper bound of documents per group number
   * @param withinGroupOffset Offset within each group of child docs
   * @param withinGroupSort Sort criteria within groups
   * @param fillSortFields Specifies whether to add sort fields or not
   * @return TopGroups for the query specified by slot
   * @throws IOException if there is a low-level I/O error
   */
  @SuppressWarnings({"unchecked", "rawtypes"})
  private TopGroups<Integer> accumulateGroups(
      int slot,
      int offset,
      int maxDocsPerGroup,
      int withinGroupOffset,
      Sort withinGroupSort,
      boolean fillSortFields)
      throws IOException {
    final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
    final FakeScorer fakeScorer = new FakeScorer();

    int totalGroupedHitCount = 0;
    // System.out.println("slot=" + slot);

    for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) {
      final OneGroup og = sortedGroups[groupIDX];
      final int numChildDocs;
      if (slot == -1 || slot >= og.counts.length) {
        numChildDocs = 0;
      } else {
        numChildDocs = og.counts[slot];
      }

      // Number of documents in group should be bounded to prevent redundant memory allocation
      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
      // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG="
      // + maxDocsPerGroup);

      // At this point we hold all docs w/ in each group,
      // unsorted; we now sort them:
      final TopDocsCollector<?> collector;
      if (withinGroupSort == null) {
        // System.out.println("sort by score");
        // Sort by score
        if (!trackScores) {
          throw new IllegalArgumentException(
              "cannot sort by relevance within group: trackScores=false");
        }
        collector = TopScoreDocCollector.create(numDocsInGroup, true);
      } else {
        // Sort by fields
        collector =
            TopFieldCollector.create(
                withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
      }

      collector.setScorer(fakeScorer);
      collector.setNextReader(og.readerContext);
      for (int docIDX = 0; docIDX < numChildDocs; docIDX++) {
        // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
        final int doc = og.docs[slot][docIDX];
        fakeScorer.doc = doc;
        if (trackScores) {
          fakeScorer.score = og.scores[slot][docIDX];
        }
        collector.collect(doc);
      }
      totalGroupedHitCount += numChildDocs;

      final Object[] groupSortValues;

      if (fillSortFields) {
        groupSortValues = new Object[comparators.length];
        for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) {
          groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
        }
      } else {
        groupSortValues = null;
      }

      final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);

      groups[groupIDX - offset] =
          new GroupDocs<>(
              og.score,
              topDocs.getMaxScore(),
              numChildDocs,
              topDocs.scoreDocs,
              og.doc,
              groupSortValues);
    }

    return new TopGroups<>(
        new TopGroups<>(
            sort.getSort(),
            withinGroupSort == null ? null : withinGroupSort.getSort(),
            0,
            totalGroupedHitCount,
            groups,
            maxScore),
        totalHitCount);
  }
コード例 #3
0
  /**
   * @param scrollSort Whether to ignore the from and sort all hits in each shard result. Only used
   *     for scroll search
   * @param resultsArr Shard result holder
   */
  public ScoreDoc[] sortDocs(
      boolean scrollSort, AtomicArray<? extends QuerySearchResultProvider> resultsArr)
      throws IOException {
    List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> results =
        resultsArr.asList();
    if (results.isEmpty()) {
      return EMPTY_DOCS;
    }

    if (optimizeSingleShard) {
      boolean canOptimize = false;
      QuerySearchResult result = null;
      int shardIndex = -1;
      if (results.size() == 1) {
        canOptimize = true;
        result = results.get(0).value.queryResult();
        shardIndex = results.get(0).index;
      } else {
        // lets see if we only got hits from a single shard, if so, we can optimize...
        for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : results) {
          if (entry.value.queryResult().topDocs().scoreDocs.length > 0) {
            if (result != null) { // we already have one, can't really optimize
              canOptimize = false;
              break;
            }
            canOptimize = true;
            result = entry.value.queryResult();
            shardIndex = entry.index;
          }
        }
      }
      if (canOptimize) {
        int offset = result.from();
        if (scrollSort) {
          offset = 0;
        }
        ScoreDoc[] scoreDocs = result.topDocs().scoreDocs;
        if (scoreDocs.length == 0 || scoreDocs.length < offset) {
          return EMPTY_DOCS;
        }

        int resultDocsSize = result.size();
        if ((scoreDocs.length - offset) < resultDocsSize) {
          resultDocsSize = scoreDocs.length - offset;
        }
        ScoreDoc[] docs = new ScoreDoc[resultDocsSize];
        for (int i = 0; i < resultDocsSize; i++) {
          ScoreDoc scoreDoc = scoreDocs[offset + i];
          scoreDoc.shardIndex = shardIndex;
          docs[i] = scoreDoc;
        }
        return docs;
      }
    }

    @SuppressWarnings("unchecked")
    AtomicArray.Entry<? extends QuerySearchResultProvider>[] sortedResults =
        results.toArray(new AtomicArray.Entry[results.size()]);
    Arrays.sort(sortedResults, QUERY_RESULT_ORDERING);
    QuerySearchResultProvider firstResult = sortedResults[0].value;

    final Sort sort;
    if (firstResult.queryResult().topDocs() instanceof TopFieldDocs) {
      TopFieldDocs firstTopDocs = (TopFieldDocs) firstResult.queryResult().topDocs();
      sort = new Sort(firstTopDocs.fields);
    } else {
      sort = null;
    }

    int topN = firstResult.queryResult().size();
    // Need to use the length of the resultsArr array, since the slots will be based on the position
    // in the resultsArr array
    TopDocs[] shardTopDocs = new TopDocs[resultsArr.length()];
    if (firstResult.includeFetch()) {
      // if we did both query and fetch on the same go, we have fetched all the docs from each
      // shards already, use them...
      // this is also important since we shortcut and fetch only docs from "from" and up to "size"
      topN *= sortedResults.length;
    }
    for (AtomicArray.Entry<? extends QuerySearchResultProvider> sortedResult : sortedResults) {
      TopDocs topDocs = sortedResult.value.queryResult().topDocs();
      // the 'index' field is the position in the resultsArr atomic array
      shardTopDocs[sortedResult.index] = topDocs;
    }
    int from = firstResult.queryResult().from();
    if (scrollSort) {
      from = 0;
    }
    // TopDocs#merge can't deal with null shard TopDocs
    for (int i = 0; i < shardTopDocs.length; i++) {
      if (shardTopDocs[i] == null) {
        shardTopDocs[i] = Lucene.EMPTY_TOP_DOCS;
      }
    }
    TopDocs mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
    return mergedTopDocs.scoreDocs;
  }