public static Map<String, Integer> termFrequencies( IndexSearcher indexSearcher, Query documentFilterQuery, String fieldName, String propName, String altName) { try { String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName); Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false); Map<String, Integer> freq = new HashMap<>(); IndexReader indexReader = indexSearcher.getIndexReader(); for (LeafReaderContext arc : indexReader.leaves()) { if (weight == null) throw new RuntimeException("weight == null"); if (arc == null) throw new RuntimeException("arc == null"); if (arc.reader() == null) throw new RuntimeException("arc.reader() == null"); Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs()); if (scorer != null) { while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { getFrequenciesFromTermVector( indexReader, scorer.docID() + arc.docBase, luceneField, freq); } } } return freq; } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity( SearchContext context, int nestedSubDocId, LeafReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException { int currentParent = nestedSubDocId; ObjectMapper nestedParentObjectMapper; ObjectMapper current = nestedObjectMapper; String originalName = nestedObjectMapper.name(); InternalSearchHit.InternalNestedIdentity nestedIdentity = null; do { Query parentFilter; nestedParentObjectMapper = documentMapper.findParentObjectMapper(current); if (nestedParentObjectMapper != null) { if (nestedParentObjectMapper.nested().isNested() == false) { current = nestedParentObjectMapper; continue; } parentFilter = nestedParentObjectMapper.nestedTypeFilter(); } else { parentFilter = Queries.newNonNestedFilter(); } Query childFilter = nestedObjectMapper.nestedTypeFilter(); if (childFilter == null) { current = nestedParentObjectMapper; continue; } final Weight childWeight = context.searcher().createNormalizedWeight(childFilter, false); Scorer childScorer = childWeight.scorer(subReaderContext); if (childScorer == null) { current = nestedParentObjectMapper; continue; } DocIdSetIterator childIter = childScorer.iterator(); BitSet parentBits = context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext); int offset = 0; int nextParent = parentBits.nextSetBit(currentParent); for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) { offset++; } currentParent = nextParent; current = nestedObjectMapper = nestedParentObjectMapper; int currentPrefix = current == null ? 0 : current.name().length() + 1; nestedIdentity = new InternalSearchHit.InternalNestedIdentity( originalName.substring(currentPrefix), offset, nestedIdentity); if (current != null) { originalName = current.name(); } } while (current != null); return nestedIdentity; }
/** Replay the wrapped collector, but only on a selection of buckets. */ @Override public void prepareSelectedBuckets(long... selectedBuckets) throws IOException { if (!finished) { throw new IllegalStateException( "Cannot replay yet, collection is not finished: postCollect() has not been called"); } if (this.selectedBuckets != null) { throw new IllegalStateException("Already been replayed"); } final LongHash hash = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE); for (long bucket : selectedBuckets) { hash.add(bucket); } this.selectedBuckets = hash; collector.preCollection(); boolean needsScores = collector.needsScores(); Weight weight = null; if (needsScores) { weight = aggContext .searchContext() .searcher() .createNormalizedWeight(aggContext.searchContext().query(), true); } for (Entry entry : entries) { final LeafBucketCollector leafCollector = collector.getLeafCollector(entry.context); DocIdSetIterator docIt = null; if (needsScores && entry.docDeltas.size() > 0) { Scorer scorer = weight.scorer(entry.context); // We don't need to check if the scorer is null // since we are sure that there are documents to replay (entry.docDeltas it not empty). docIt = scorer.iterator(); leafCollector.setScorer(scorer); } final PackedLongValues.Iterator docDeltaIterator = entry.docDeltas.iterator(); final PackedLongValues.Iterator buckets = entry.buckets.iterator(); int doc = 0; for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) { doc += docDeltaIterator.next(); final long bucket = buckets.next(); final long rebasedBucket = hash.find(bucket); if (rebasedBucket != -1) { if (needsScores) { if (docIt.docID() < doc) { docIt.advance(doc); } // aggregations should only be replayed on matching documents assert docIt.docID() == doc; } leafCollector.collect(doc, rebasedBucket); } } } collector.postCollection(); }
@Override public Scorer filteredScorer(LeafReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException { // CHANGE: If threshold is 0, always pass down the accept docs, don't pay the price of calling // nextDoc even... final Bits filterAcceptDocs = docIdSet.bits(); if (threshold == 0) { if (filterAcceptDocs != null) { return weight.scorer(context, filterAcceptDocs); } else { return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer( context, weight, docIdSet); } } // CHANGE: handle "default" value if (threshold == -1) { // default value, don't iterate on only apply filter after query if its not a "fast" // docIdSet // TODO: is there a way we could avoid creating an iterator here? if (filterAcceptDocs != null && DocIdSets.isBroken(docIdSet.iterator())) { return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer( context, weight, docIdSet); } } return super.filteredScorer(context, weight, docIdSet); }
private FiltersFunctionFactorScorer functionScorer(LeafReaderContext context) throws IOException { Scorer subQueryScorer = subQueryWeight.scorer(context); if (subQueryScorer == null) { return null; } final LeafScoreFunction[] functions = new LeafScoreFunction[filterFunctions.length]; final Bits[] docSets = new Bits[filterFunctions.length]; for (int i = 0; i < filterFunctions.length; i++) { FilterFunction filterFunction = filterFunctions[i]; functions[i] = filterFunction.function.getLeafScoreFunction(context); Scorer filterScorer = filterWeights[i].scorer(context); docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer); } return new FiltersFunctionFactorScorer( this, subQueryScorer, scoreMode, filterFunctions, maxBoost, functions, docSets, combineFunction, needsScores); }
MatchNoneScorer(IndexReader reader, Similarity similarity, Weight w, byte[] norms) throws IOException { super(similarity, w); this.termDocs = reader.termDocs(null); score = w.getValue(); this.norms = norms; }
public PayloadTermSpanScorer( TermSpans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException { super(spans, weight, similarity, norms); // super (spans, weight, similarity, null); positions = spans.getPositions(); value = weight.getValue(); }
// NOTE: acceptDocs applies (and is checked) only in the // parent document space @Override public Scorer scorer(LeafReaderContext readerContext) throws IOException { final Scorer childScorer = childWeight.scorer(readerContext); if (childScorer == null) { // No matches return null; } final int firstChildDoc = childScorer.iterator().nextDoc(); if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) { // No matches return null; } // NOTE: this does not take accept docs into account, the responsibility // to not match deleted docs is on the scorer final BitSet parents = parentsFilter.getBitSet(readerContext); if (parents == null) { // No matches return null; } return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode); }
/** * Lower-level suggest API. Collects completion hits through <code>collector</code> for <code> * query</code>. * * <p>{@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)} is called * for every matching completion hit. */ public void suggest(CompletionQuery query, TopSuggestDocsCollector collector) throws IOException { // TODO use IndexSearcher.rewrite instead // have to implement equals() and hashCode() in CompletionQuerys and co query = (CompletionQuery) query.rewrite(getIndexReader()); Weight weight = query.createWeight(this, collector.needsScores()); for (LeafReaderContext context : getIndexReader().leaves()) { BulkScorer scorer = weight.bulkScorer(context); if (scorer != null) { try { scorer.score(collector.getLeafCollector(context), context.reader().getLiveDocs()); } catch (CollectionTerminatedException e) { // collection was terminated prematurely // continue with the following leaf } } } }
@Override public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { Scorer subQueryScorer = qWeight.scorer(reader, true, false); if (subQueryScorer == null) { return null; } return new BoostedQuery.CustomScorer( getSimilarity(searcher), searcher, reader, this, subQueryScorer, boostVal); }
/*(non-Javadoc) @see org.apache.lucene.search.Weight#normalize(float) */ @Override public void normalize(float norm) { norm *= getBoost(); // incorporate boost subQueryWeight.normalize(norm); for (int i = 0; i < valSrcWeights.length; i++) { if (qStrict) { valSrcWeights[i].normalize(1); // do not normalize the ValueSource part } else { valSrcWeights[i].normalize(norm); } } }
@Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Explanation expl = subQueryWeight.explain(context, doc); if (!expl.isMatch()) { return expl; } // First: Gather explanations for all filters List<Explanation> filterExplanations = new ArrayList<>(); for (int i = 0; i < filterFunctions.length; ++i) { Bits docSet = Lucene.asSequentialAccessBits( context.reader().maxDoc(), filterWeights[i].scorer(context)); if (docSet.get(doc)) { FilterFunction filterFunction = filterFunctions[i]; Explanation functionExplanation = filterFunction.function.getLeafScoreFunction(context).explainScore(doc, expl); double factor = functionExplanation.getValue(); float sc = CombineFunction.toFloat(factor); Explanation filterExplanation = Explanation.match( sc, "function score, product of:", Explanation.match(1.0f, "match filter: " + filterFunction.filter.toString()), functionExplanation); filterExplanations.add(filterExplanation); } } if (filterExplanations.size() > 0) { FiltersFunctionFactorScorer scorer = functionScorer(context); int actualDoc = scorer.iterator().advance(doc); assert (actualDoc == doc); double score = scorer.computeScore(doc, expl.getValue()); Explanation factorExplanation = Explanation.match( CombineFunction.toFloat(score), "function score, score mode [" + scoreMode.toString().toLowerCase(Locale.ROOT) + "]", filterExplanations); expl = combineFunction.explain(expl, factorExplanation, maxBoost); } if (minScore != null && minScore > expl.getValue()) { expl = Explanation.noMatch( "Score value is too low, expected at least " + minScore + " but got " + expl.getValue(), expl); } return expl; }
protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException { super(similarity, weight); this.spans = spans; this.norms = norms; this.value = weight.getValue(); if (this.spans.next()) { doc = -1; } else { doc = NO_MORE_DOCS; more = false; } }
@Override public Scorer scorer( final AtomicReaderContext context, final boolean scoreDocsInOrder, final boolean topScorer, final Bits acceptDocs) throws IOException { final NodeScorer scorer = (NodeScorer) weight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); if (scorer == null) { return null; } return new AncestorFilterScorer(scorer, ancestorLevel); }
/*(non-Javadoc) @see org.apache.lucene.search.Weight#sumOfSquaredWeights() */ @Override public float sumOfSquaredWeights() throws IOException { float sum = subQueryWeight.sumOfSquaredWeights(); for (int i = 0; i < valSrcWeights.length; i++) { if (qStrict) { valSrcWeights[i] .sumOfSquaredWeights(); // do not include ValueSource part in the query normalization } else { sum += valSrcWeights[i].sumOfSquaredWeights(); } } sum *= getBoost() * getBoost(); // boost each sub-weight return sum; }
public Explanation explain(AtomicReaderContext context, int doc) throws IOException { Explanation mainExplain = mainWeight.explain(context, doc); return new QueryRescorer(reRankQuery) { @Override protected float combine( float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score += reRankWeight * secondPassScore; } return score; } }.explain(searcher, mainExplain, context.docBase + doc); }
@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { // no need to provide deleted docs to the filter final Bits bits = DocIdSets.asSequentialAccessBits(ctx.reader().maxDoc(), filter.scorer(ctx, null)); return new LeafBucketCollectorBase(sub, null) { @Override public void collect(int doc, long bucket) throws IOException { if (bits.get(doc)) { collectBucket(sub, doc, bucket); } } }; }
private Map<String, Query> getSingleTermQueries( String termQuery, TokenType tokenType, boolean collapse) throws IOException { Map<String, Query> queriesMap = new HashMap<String, Query>(); if (termQuery.contains(WILDCARD_ASTERISK) || termQuery.contains(WILDCARD_QUESTION)) { // contains a wildcard Term term = getTerm(termQuery, tokenType); Query query = getWildCardQuery(term); if (collapse) { // treat all wildcard variants as a single term queriesMap.put(termQuery, query); } else { // separate each wildcard term into its own query Set<Term> terms = new HashSet<Term>(); Weight weight = query.createWeight(indexSearcher, false); weight.extractTerms(terms); for (Term t : terms) { // we don't need to analyze term here since it's already from the index queriesMap.put(t.text(), getTermQuery(t)); } } } else { // regular term (we hope) Term term = getAnalyzedTerm(tokenType, termQuery); // analyze it first queriesMap.put(termQuery, getTermQuery(term)); } return queriesMap; }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); if (values == null) { return null; } Scorer approximationScorer = approximationWeight.scorer(context); if (approximationScorer == null) { return null; } else if (globalOrds != null) { return new OrdinalMapScorer( this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord)); } else { return new SegmentOrdinalScorer(this, collector, values, approximationScorer); } }
@Override public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { // Pass true for "scoresDocsInOrder", because we // require in-order scoring, even if caller does not, // since we call advance on the valSrcScorers. Pass // false for "topScorer" because we will not invoke // score(Collector) on these scorers: Scorer subQueryScorer = subQueryWeight.scorer(reader, true, false); if (subQueryScorer == null) { return null; } Scorer[] valSrcScorers = new Scorer[valSrcWeights.length]; for (int i = 0; i < valSrcScorers.length; i++) { valSrcScorers[i] = valSrcWeights[i].scorer(reader, true, topScorer); } return new CustomScorer(similarity, reader, this, subQueryScorer, valSrcScorers); }
@Override public Explanation explain(IndexReader reader, int doc) throws IOException { SolrIndexReader topReader = (SolrIndexReader) reader; SolrIndexReader[] subReaders = topReader.getLeafReaders(); int[] offsets = topReader.getLeafOffsets(); int readerPos = SolrIndexReader.readerIndex(doc, offsets); int readerBase = offsets[readerPos]; Explanation subQueryExpl = qWeight.explain(reader, doc); if (!subQueryExpl.isMatch()) { return subQueryExpl; } DocValues vals = boostVal.getValues(context, subReaders[readerPos]); float sc = subQueryExpl.getValue() * vals.floatVal(doc - readerBase); Explanation res = new ComplexExplanation(true, sc, BoostedQuery.this.toString() + ", product of:"); res.addDetail(subQueryExpl); res.addDetail(vals.explain(doc - readerBase)); return res; }
@Override protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { for (LeafReaderContext ctx : leaves) { // search each subreader // we force the use of Scorer (not BulkScorer) to make sure // that the scorer passed to LeafCollector.setScorer supports // Scorer.getChildren Scorer scorer = weight.scorer(ctx); if (scorer != null) { final LeafCollector leafCollector = collector.getLeafCollector(ctx); leafCollector.setScorer(scorer); final Bits liveDocs = ctx.reader().getLiveDocs(); for (int doc = scorer.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = scorer.nextDoc()) { if (liveDocs == null || liveDocs.get(doc)) { leafCollector.collect(doc); } } } } }
@Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { SortedDocValues values = DocValues.getSorted(context.reader(), joinField); if (values == null) { return null; } Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs); if (approximationScorer == null) { return null; } if (globalOrds != null) { return new OrdinalMapScorer( this, queryNorm, foundOrds, values, approximationScorer, globalOrds.getGlobalOrds(context.ord)); } { return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer); } }
private Explanation doExplain(IndexReader reader, int doc) throws IOException { Explanation subQueryExpl = subQueryWeight.explain(reader, doc); if (!subQueryExpl.isMatch()) { return subQueryExpl; } // match Explanation[] valSrcExpls = new Explanation[valSrcWeights.length]; for (int i = 0; i < valSrcWeights.length; i++) { valSrcExpls[i] = valSrcWeights[i].explain(reader, doc); } Explanation customExp = CustomScoreQuery.this .getCustomScoreProvider(reader) .customExplain(doc, subQueryExpl, valSrcExpls); float sc = getValue() * customExp.getValue(); Explanation res = new ComplexExplanation(true, sc, CustomScoreQuery.this.toString() + ", product of:"); res.addDetail(customExp); res.addDetail( new Explanation( getValue(), "queryBoost")); // actually using the q boost as q weight (== weight value) return res; }
@Override public float getValueForNormalization() throws IOException { return subQueryWeight.getValueForNormalization(); }
@Override public float sumOfSquaredWeights() throws IOException { float sum = qWeight.sumOfSquaredWeights(); sum *= getBoost() * getBoost(); return sum; }
@Override public float getValueForNormalization() throws IOException { float sum = parentWeight.getValueForNormalization(); sum *= getBoost() * getBoost(); return sum; }
@Override public void extractTerms(Set<Term> terms) { subQueryWeight.extractTerms(terms); }
@Override public void normalize(float norm, float boost) { subQueryWeight.normalize(norm, boost); }
@Override public void normalize(float norm) { norm *= getBoost(); qWeight.normalize(norm); }