public void testNegativeScores() throws Exception {

    // The Top*Collectors previously filtered out documents with <= scores. This
    // behavior has changed. This test checks that if PositiveOnlyScoresFilter
    // wraps one of these collectors, documents with <= 0 scores are indeed
    // filtered.

    int numPositiveScores = 0;
    for (int i = 0; i < scores.length; i++) {
      if (scores[i] > 0) {
        ++numPositiveScores;
      }
    }

    Scorer s = new SimpleScorer();
    TopDocsCollector tdc = TopScoreDocCollector.create(scores.length, true);
    Collector c = new PositiveScoresOnlyCollector(tdc);
    c.setScorer(s);
    while (s.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      c.collect(0);
    }
    TopDocs td = tdc.topDocs();
    ScoreDoc[] sd = td.scoreDocs;
    assertEquals(numPositiveScores, td.totalHits);
    for (int i = 0; i < sd.length; i++) {
      assertTrue("only positive scores should return: " + sd[i].score, sd[i].score > 0);
    }
  }
Exemplo n.º 2
0
  public void queriesTest(String queryText, int[] expDocNrs) throws Exception {
    // System.out.println();
    // System.out.println("Query: " + queryText);

    Query query = makeQuery(queryText);
    TopScoreDocCollector collector = TopScoreDocCollector.create(1000, false);
    searcher.search(query, null, collector);
    ScoreDoc[] hits1 = collector.topDocs().scoreDocs;

    collector = TopScoreDocCollector.create(1000, true);
    searcher.search(query, null, collector);
    ScoreDoc[] hits2 = collector.topDocs().scoreDocs;

    assertEquals(mulFactor * collector.totalHits, bigSearcher.search(query, 1).totalHits);

    CheckHits.checkHitsQuery(query, hits1, hits2, expDocNrs);
  }
Exemplo n.º 3
0
    @Override
    public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext)
        throws IOException {
      final String field;
      final String term;
      if (isParentHit(hitContext.hit())) {
        field = ParentFieldMapper.NAME;
        term = Uid.createUid(hitContext.hit().type(), hitContext.hit().id());
      } else if (isChildHit(hitContext.hit())) {
        field = UidFieldMapper.NAME;
        SearchHitField parentField = hitContext.hit().field(ParentFieldMapper.NAME);
        if (parentField != null) {
          term = parentField.getValue();
        } else {
          SingleFieldsVisitor fieldsVisitor = new SingleFieldsVisitor(ParentFieldMapper.NAME);
          hitContext.reader().document(hitContext.docId(), fieldsVisitor);
          if (fieldsVisitor.fields().isEmpty()) {
            return Lucene.EMPTY_TOP_DOCS;
          }
          term = (String) fieldsVisitor.fields().get(ParentFieldMapper.NAME).get(0);
        }
      } else {
        return Lucene.EMPTY_TOP_DOCS;
      }
      // Only include docs that have the current hit as parent
      Filter filter = new TermFilter(new Term(field, term));
      // Only include docs that have this inner hits type
      Filter typeFilter = documentMapper.typeFilter();

      if (size() == 0) {
        TotalHitCountCollector collector = new TotalHitCountCollector();
        context
            .searcher()
            .search(
                new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))),
                collector);
        return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
      } else {
        int topN = from() + size();
        TopDocsCollector topDocsCollector;
        if (sort() != null) {
          topDocsCollector =
              TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), false);
        } else {
          topDocsCollector = TopScoreDocCollector.create(topN, false);
        }
        context
            .searcher()
            .search(
                new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))),
                topDocsCollector);
        return topDocsCollector.topDocs(from(), size());
      }
    }
Exemplo n.º 4
0
    @Override
    public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext)
        throws IOException {
      Filter rawParentFilter;
      if (parentObjectMapper == null) {
        rawParentFilter = NonNestedDocsFilter.INSTANCE;
      } else {
        rawParentFilter = parentObjectMapper.nestedTypeFilter();
      }
      FixedBitSetFilter parentFilter =
          context.fixedBitSetFilterCache().getFixedBitSetFilter(rawParentFilter);
      Filter childFilter = context.filterCache().cache(childObjectMapper.nestedTypeFilter());
      Query q =
          new XFilteredQuery(
              query, new NestedChildrenFilter(parentFilter, childFilter, hitContext));

      if (size() == 0) {
        TotalHitCountCollector collector = new TotalHitCountCollector();
        context.searcher().search(q, collector);
        return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
      } else {
        int topN = from() + size();
        TopDocsCollector topDocsCollector;
        if (sort() != null) {
          try {
            topDocsCollector =
                TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), true);
          } catch (IOException e) {
            throw ExceptionsHelper.convertToElastic(e);
          }
        } else {
          topDocsCollector = TopScoreDocCollector.create(topN, true);
        }
        context.searcher().search(q, topDocsCollector);
        return topDocsCollector.topDocs(from(), size());
      }
    }
Exemplo n.º 5
0
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();

    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));
    indexWriter.commit();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates =
            RandomInts.randomIntBetween(
                random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5));
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      // leave min/max set to 0 half the time
      int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110);
      int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110);

      QueryBuilder queryBuilder =
          hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue)))
              .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH))
              .minChildren(minChildren)
              .maxChildren(maxChildren)
              .setShortCircuitCutoff(shortCircuitParentDocSet);
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me")));
      Query query = parseQuery(queryBuilder);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits);
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()];
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            int count = entry.getValue().elementsCount;
            if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) {
              TermsEnum.SeekStatus seekStatus =
                  termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
              if (seekStatus == TermsEnum.SeekStatus.FOUND) {
                docsEnum =
                    termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                expectedResult.set(docsEnum.nextDoc());
                scores[docsEnum.docID()] = new FloatArrayList(entry.getValue());
              } else if (seekStatus == TermsEnum.SeekStatus.END) {
                break;
              }
            }
          }
        }
        MockScorer mockScorer = new MockScorer(scoreType);
        final LeafCollector leafCollector =
            expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext());
        leafCollector.setScorer(mockScorer);
        for (int doc = expectedResult.nextSetBit(0);
            doc < slowLeafReader.maxDoc();
            doc =
                doc + 1 >= expectedResult.length()
                    ? DocIdSetIterator.NO_MORE_DOCS
                    : expectedResult.nextSetBit(doc + 1)) {
          mockScorer.scores = scores[doc];
          leafCollector.collect(doc);
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
  /**
   * Accumulates groups for the BlockJoinQuery specified by its slot.
   *
   * @param slot Search query's slot
   * @param offset Parent docs offset
   * @param maxDocsPerGroup Upper bound of documents per group number
   * @param withinGroupOffset Offset within each group of child docs
   * @param withinGroupSort Sort criteria within groups
   * @param fillSortFields Specifies whether to add sort fields or not
   * @return TopGroups for the query specified by slot
   * @throws IOException if there is a low-level I/O error
   */
  @SuppressWarnings({"unchecked", "rawtypes"})
  private TopGroups<Integer> accumulateGroups(
      int slot,
      int offset,
      int maxDocsPerGroup,
      int withinGroupOffset,
      Sort withinGroupSort,
      boolean fillSortFields)
      throws IOException {
    final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
    final FakeScorer fakeScorer = new FakeScorer();

    int totalGroupedHitCount = 0;
    // System.out.println("slot=" + slot);

    for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) {
      final OneGroup og = sortedGroups[groupIDX];
      final int numChildDocs;
      if (slot == -1 || slot >= og.counts.length) {
        numChildDocs = 0;
      } else {
        numChildDocs = og.counts[slot];
      }

      // Number of documents in group should be bounded to prevent redundant memory allocation
      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
      // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG="
      // + maxDocsPerGroup);

      // At this point we hold all docs w/ in each group,
      // unsorted; we now sort them:
      final TopDocsCollector<?> collector;
      if (withinGroupSort == null) {
        // System.out.println("sort by score");
        // Sort by score
        if (!trackScores) {
          throw new IllegalArgumentException(
              "cannot sort by relevance within group: trackScores=false");
        }
        collector = TopScoreDocCollector.create(numDocsInGroup, true);
      } else {
        // Sort by fields
        collector =
            TopFieldCollector.create(
                withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
      }

      collector.setScorer(fakeScorer);
      collector.setNextReader(og.readerContext);
      for (int docIDX = 0; docIDX < numChildDocs; docIDX++) {
        // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
        final int doc = og.docs[slot][docIDX];
        fakeScorer.doc = doc;
        if (trackScores) {
          fakeScorer.score = og.scores[slot][docIDX];
        }
        collector.collect(doc);
      }
      totalGroupedHitCount += numChildDocs;

      final Object[] groupSortValues;

      if (fillSortFields) {
        groupSortValues = new Object[comparators.length];
        for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) {
          groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
        }
      } else {
        groupSortValues = null;
      }

      final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);

      groups[groupIDX - offset] =
          new GroupDocs<>(
              og.score,
              topDocs.getMaxScore(),
              numChildDocs,
              topDocs.scoreDocs,
              og.doc,
              groupSortValues);
    }

    return new TopGroups<>(
        new TopGroups<>(
            sort.getSort(),
            withinGroupSort == null ? null : withinGroupSort.getSort(),
            0,
            totalGroupedHitCount,
            groups,
            maxScore),
        totalHitCount);
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(
                childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>());
          }
          if (!markParentAsDeleted) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    for (String childValue : childValues) {
      Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue)));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      Query query =
          new ChildrenQuery(
              "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      MockScorer mockScorer = new MockScorer(scoreType);
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      expectedTopDocsCollector.setScorer(mockScorer);
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
              mockScorer.scores = entry.getValue();
              expectedTopDocsCollector.collect(docsEnum.docID());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexReader.close();
    directory.close();
  }