Ejemplo n.º 1
0
 NestedChildrenFilter(
     FixedBitSetFilter parentFilter, Filter childFilter, FetchSubPhase.HitContext hitContext) {
   this.parentFilter = parentFilter;
   this.childFilter = childFilter;
   this.docId = hitContext.docId();
   this.atomicReader = hitContext.readerContext().reader();
 }
  @Test
  public void testNestedChildrenFilter() throws Exception {
    int numParentDocs = scaledRandomIntBetween(0, 32);
    int maxChildDocsPerParent = scaledRandomIntBetween(8, 16);

    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numParentDocs; i++) {
      int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent);
      List<Document> docs = new ArrayList<>(numChildDocs + 1);
      for (int j = 0; j < numChildDocs; j++) {
        Document childDoc = new Document();
        childDoc.add(new StringField("type", "child", Field.Store.NO));
        docs.add(childDoc);
      }

      Document parenDoc = new Document();
      parenDoc.add(new StringField("type", "parent", Field.Store.NO));
      parenDoc.add(new IntField("num_child_docs", numChildDocs, Field.Store.YES));
      docs.add(parenDoc);
      writer.addDocuments(docs);
    }

    IndexReader reader = writer.getReader();
    writer.close();

    IndexSearcher searcher = new IndexSearcher(reader);
    FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
    BitDocIdSetFilter parentFilter =
        new BitDocIdSetCachingWrapperFilter(
            new QueryWrapperFilter(new TermQuery(new Term("type", "parent"))));
    Filter childFilter = new QueryWrapperFilter(new TermQuery(new Term("type", "child")));
    int checkedParents = 0;
    for (LeafReaderContext leaf : reader.leaves()) {
      DocIdSetIterator parents = parentFilter.getDocIdSet(leaf).iterator();
      for (int parentDoc = parents.nextDoc();
          parentDoc != DocIdSetIterator.NO_MORE_DOCS;
          parentDoc = parents.nextDoc()) {
        int expectedChildDocs =
            leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue();
        hitContext.reset(null, leaf, parentDoc, searcher);
        NestedChildrenFilter nestedChildrenFilter =
            new NestedChildrenFilter(parentFilter, childFilter, hitContext);
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector);
        assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs));
        checkedParents++;
      }
    }
    assertThat(checkedParents, equalTo(numParentDocs));
    reader.close();
    dir.close();
  }
Ejemplo n.º 3
0
    @Override
    public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext)
        throws IOException {
      final String field;
      final String term;
      if (isParentHit(hitContext.hit())) {
        field = ParentFieldMapper.NAME;
        term = Uid.createUid(hitContext.hit().type(), hitContext.hit().id());
      } else if (isChildHit(hitContext.hit())) {
        field = UidFieldMapper.NAME;
        SearchHitField parentField = hitContext.hit().field(ParentFieldMapper.NAME);
        if (parentField != null) {
          term = parentField.getValue();
        } else {
          SingleFieldsVisitor fieldsVisitor = new SingleFieldsVisitor(ParentFieldMapper.NAME);
          hitContext.reader().document(hitContext.docId(), fieldsVisitor);
          if (fieldsVisitor.fields().isEmpty()) {
            return Lucene.EMPTY_TOP_DOCS;
          }
          term = (String) fieldsVisitor.fields().get(ParentFieldMapper.NAME).get(0);
        }
      } else {
        return Lucene.EMPTY_TOP_DOCS;
      }
      // Only include docs that have the current hit as parent
      Filter filter = new TermFilter(new Term(field, term));
      // Only include docs that have this inner hits type
      Filter typeFilter = documentMapper.typeFilter();

      if (size() == 0) {
        TotalHitCountCollector collector = new TotalHitCountCollector();
        context
            .searcher()
            .search(
                new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))),
                collector);
        return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
      } else {
        int topN = from() + size();
        TopDocsCollector topDocsCollector;
        if (sort() != null) {
          topDocsCollector =
              TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), false);
        } else {
          topDocsCollector = TopScoreDocCollector.create(topN, false);
        }
        context
            .searcher()
            .search(
                new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))),
                topDocsCollector);
        return topDocsCollector.topDocs(from(), size());
      }
    }
  @Override
  public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper mapper = highlighterContext.mapper;

    if (canHighlight(mapper) == false) {
      throw new IllegalArgumentException(
          "the field ["
              + highlighterContext.fieldName
              + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
    }

    Encoder encoder =
        field.fieldOptions().encoder().equals("html")
            ? HighlightUtils.Encoders.HTML
            : HighlightUtils.Encoders.DEFAULT;

    if (!hitContext.cache().containsKey(CACHE_KEY)) {
      hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
    }
    HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);

    try {
      FieldQuery fieldQuery;
      if (field.fieldOptions().requireFieldMatch()) {
        if (cache.fieldMatchFieldQuery == null) {
          /*
           * we use top level reader to rewrite the query against all readers, with use caching it across hits (and across
           * readers...)
           */
          cache.fieldMatchFieldQuery =
              new CustomFieldQuery(
                  highlighterContext.query,
                  hitContext.topLevelReader(),
                  true,
                  field.fieldOptions().requireFieldMatch());
        }
        fieldQuery = cache.fieldMatchFieldQuery;
      } else {
        if (cache.noFieldMatchFieldQuery == null) {
          /*
           * we use top level reader to rewrite the query against all readers, with use caching it across hits (and across
           * readers...)
           */
          cache.noFieldMatchFieldQuery =
              new CustomFieldQuery(
                  highlighterContext.query,
                  hitContext.topLevelReader(),
                  true,
                  field.fieldOptions().requireFieldMatch());
        }
        fieldQuery = cache.noFieldMatchFieldQuery;
      }

      MapperHighlightEntry entry = cache.mappers.get(mapper);
      if (entry == null) {
        FragListBuilder fragListBuilder;
        BaseFragmentsBuilder fragmentsBuilder;

        BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER;
        if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
            || field.fieldOptions().boundaryChars()
                != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
          boundaryScanner =
              new SimpleBoundaryScanner(
                  field.fieldOptions().boundaryMaxScan(), field.fieldOptions().boundaryChars());
        }
        boolean forceSource = context.highlight().forceSource(field);
        if (field.fieldOptions().numberOfFragments() == 0) {
          fragListBuilder = new SingleFragListBuilder();

          if (!forceSource && mapper.fieldType().stored()) {
            fragmentsBuilder =
                new SimpleFragmentsBuilder(
                    mapper,
                    field.fieldOptions().preTags(),
                    field.fieldOptions().postTags(),
                    boundaryScanner);
          } else {
            fragmentsBuilder =
                new SourceSimpleFragmentsBuilder(
                    mapper,
                    context,
                    field.fieldOptions().preTags(),
                    field.fieldOptions().postTags(),
                    boundaryScanner);
          }
        } else {
          fragListBuilder =
              field.fieldOptions().fragmentOffset() == -1
                  ? new SimpleFragListBuilder()
                  : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
          if (field.fieldOptions().scoreOrdered()) {
            if (!forceSource && mapper.fieldType().stored()) {
              fragmentsBuilder =
                  new ScoreOrderFragmentsBuilder(
                      field.fieldOptions().preTags(),
                      field.fieldOptions().postTags(),
                      boundaryScanner);
            } else {
              fragmentsBuilder =
                  new SourceScoreOrderFragmentsBuilder(
                      mapper,
                      context,
                      field.fieldOptions().preTags(),
                      field.fieldOptions().postTags(),
                      boundaryScanner);
            }
          } else {
            if (!forceSource && mapper.fieldType().stored()) {
              fragmentsBuilder =
                  new SimpleFragmentsBuilder(
                      mapper,
                      field.fieldOptions().preTags(),
                      field.fieldOptions().postTags(),
                      boundaryScanner);
            } else {
              fragmentsBuilder =
                  new SourceSimpleFragmentsBuilder(
                      mapper,
                      context,
                      field.fieldOptions().preTags(),
                      field.fieldOptions().postTags(),
                      boundaryScanner);
            }
          }
        }
        fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
        entry = new MapperHighlightEntry();
        entry.fragListBuilder = fragListBuilder;
        entry.fragmentsBuilder = fragmentsBuilder;
        if (cache.fvh == null) {
          // parameters to FVH are not requires since:
          // first two booleans are not relevant since they are set on the CustomFieldQuery (phrase
          // and fieldMatch)
          // fragment builders are used explicitly
          cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
        }
        CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
        cache.mappers.put(mapper, entry);
      }
      cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());

      String[] fragments;

      // a HACK to make highlighter do highlighting, even though its using the single frag list
      // builder
      int numberOfFragments =
          field.fieldOptions().numberOfFragments() == 0
              ? Integer.MAX_VALUE
              : field.fieldOptions().numberOfFragments();
      int fragmentCharSize =
          field.fieldOptions().numberOfFragments() == 0
              ? Integer.MAX_VALUE
              : field.fieldOptions().fragmentCharSize();
      // we highlight against the low level reader and docId, because if we load source, we want to
      // reuse it if possible
      // Only send matched fields if they were requested to save time.
      if (field.fieldOptions().matchedFields() != null
          && !field.fieldOptions().matchedFields().isEmpty()) {
        fragments =
            cache.fvh.getBestFragments(
                fieldQuery,
                hitContext.reader(),
                hitContext.docId(),
                mapper.fieldType().name(),
                field.fieldOptions().matchedFields(),
                fragmentCharSize,
                numberOfFragments,
                entry.fragListBuilder,
                entry.fragmentsBuilder,
                field.fieldOptions().preTags(),
                field.fieldOptions().postTags(),
                encoder);
      } else {
        fragments =
            cache.fvh.getBestFragments(
                fieldQuery,
                hitContext.reader(),
                hitContext.docId(),
                mapper.fieldType().name(),
                fragmentCharSize,
                numberOfFragments,
                entry.fragListBuilder,
                entry.fragmentsBuilder,
                field.fieldOptions().preTags(),
                field.fieldOptions().postTags(),
                encoder);
      }

      if (fragments != null && fragments.length > 0) {
        return new HighlightField(
            highlighterContext.fieldName, Text.convertFromStringArray(fragments));
      }

      int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
      if (noMatchSize > 0) {
        // Essentially we just request that a fragment is built from 0 to noMatchSize using the
        // normal fragmentsBuilder
        FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
        fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
        fragments =
            entry.fragmentsBuilder.createFragments(
                hitContext.reader(),
                hitContext.docId(),
                mapper.fieldType().name(),
                fieldFragList,
                1,
                field.fieldOptions().preTags(),
                field.fieldOptions().postTags(),
                encoder);
        if (fragments != null && fragments.length > 0) {
          return new HighlightField(
              highlighterContext.fieldName, Text.convertFromStringArray(fragments));
        }
      }

      return null;

    } catch (Exception e) {
      throw new FetchPhaseExecutionException(
          context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
  }