/** Search, sorting by {@link Sort}, and computing drill down and sideways counts. */ public DrillSidewaysResult search( DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException { if (filter != null) { query = new DrillDownQuery(config, filter, query); } if (sort != null) { int limit = searcher.getIndexReader().maxDoc(); if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } topN = Math.min(topN, limit); final TopFieldCollector hitCollector = TopFieldCollector.create(sort, topN, after, true, doDocScores, doMaxScore, true); DrillSidewaysResult r = search(query, hitCollector); return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); } else { return search(after, query, topN); } }
private OtpErlangObject match(LucenePageToken pageToken, int pageSize) throws IOException, ParseException { IndexReader reader = IndexReader.open(this.index); Query q = this.queryParser().parse(pageToken.getQueryString()); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs; Sort sort = new Sort(pageToken.getSortFields()); TopFieldCollector collector = TopFieldCollector.create( sort, pageToken.getNextFirstHit() + pageSize - 1, true, true, true, true); searcher.search(q, collector); topDocs = collector.topDocs(pageToken.getNextFirstHit() - 1); ScoreDoc[] hits = topDocs.scoreDocs; // jlog.info("Sort: " + sort + "; topDocs: " + topDocs + "; hits: + " + // hits); int firstHit = 0; if (hits.length > 0) { firstHit = pageToken.getNextFirstHit(); } List<Document> docs = new ArrayList<Document>(hits.length); for (ScoreDoc sd : hits) { docs.add(searcher.doc(sd.doc)); } searcher.close(); boolean nextPage = hits.length == pageSize && pageToken.incrementFirstHit(pageSize) <= topDocs.totalHits; OtpErlangList valuesAsList = this.translator.convert(docs, hits); // Metadata as a proplist OtpErlangObject[] metadata = new OtpErlangObject[nextPage ? 3 : 2]; metadata[0] = new OtpErlangTuple( new OtpErlangObject[] { new OtpErlangAtom("total_hits"), new OtpErlangLong(topDocs.totalHits) }); metadata[1] = new OtpErlangTuple( new OtpErlangObject[] {new OtpErlangAtom("first_hit"), new OtpErlangLong(firstHit)}); if (nextPage) { metadata[2] = new OtpErlangTuple( new OtpErlangObject[] { new OtpErlangAtom("next_page"), new OtpErlangBinary(pageToken) }); } OtpErlangList metadataAsList = new OtpErlangList(metadata); // Final result return new OtpErlangTuple(new OtpErlangObject[] {valuesAsList, metadataAsList}); }
@Test public void testRandomQueries() throws Exception { String[] vals = {"w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz"}; int tot = 0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing int num = atLeast(10); for (int i = 0; i < num; i++) { int level = random.nextInt(3); q1 = randBoolQuery( new Random(random.nextLong()), random.nextBoolean(), level, field, vals, null); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.check(random, q1, searcher); final Similarity oldSim = searcher.getSimilarity(); try { searcher.setSimilarity(new FunkySimilarity()); QueryUtils.check(random, q1, searcher); } finally { searcher.setSimilarity(oldSim); } TopFieldCollector collector = TopFieldCollector.create(sort, 1000, false, true, true, true); searcher.search(q1, null, collector); ScoreDoc[] hits1 = collector.topDocs().scoreDocs; collector = TopFieldCollector.create(sort, 1000, false, true, true, false); searcher.search(q1, null, collector); ScoreDoc[] hits2 = collector.topDocs().scoreDocs; tot += hits2.length; CheckHits.checkEqual(q1, hits1, hits2); BooleanQuery q3 = new BooleanQuery(); q3.add(q1, BooleanClause.Occur.SHOULD); q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD); TopDocs hits4 = bigSearcher.search(q3, 1); assertEquals(mulFactor * collector.totalHits + NUM_EXTRA_DOCS / 2, hits4.totalHits); } } catch (Exception e) { // For easier debugging System.out.println("failed query: " + q1); throw e; } // System.out.println("Total hits:"+tot); }
/** * Creates and submits a query to the Lucene engine. * * @param query The base query, using the query engine supported by Lucene. * @param sort A Lucene sort object, can contain one or more sort criterias. If <tt>null</tt>, * sort by hit score. * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be * <tt>null</tt> to search all virtual wikis. * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> or * empty to search all languages. * @param indexes List of Lucene indexes (readers) to search. * @param context The context of the request. * @return The list of search results. * @throws IOException If the Lucene searchers encounter a problem reading the indexes. * @throws ParseException If the query is not valid. */ private SearchResults search( String query, Sort sort, String virtualWikiNames, String languages, IndexReader[] indexes, XWikiContext context) throws IOException, ParseException { IndexSearcher searcher = new IndexSearcher(new MultiReader(indexes, true)); // Enhance the base query with wiki names and languages. Query q = buildQuery(query, virtualWikiNames, languages); // Perform the actual search TopDocsCollector<? extends ScoreDoc> results; if (sort != null) { results = TopFieldCollector.create(sort, 1000, true, true, false, false); } else { results = TopScoreDocCollector.create(1000, false); } searcher.search(q, results); LOGGER.debug("query [{}] returned {} hits", q, results.getTotalHits()); // Transform the raw Lucene search results into XWiki-aware results return new SearchResults( results, searcher, new com.xpn.xwiki.api.XWiki(context.getWiki(), context), context); }
public ReRankCollector( int reRankDocs, int length, Query reRankQuery, double reRankWeight, SolrIndexSearcher.QueryCommand cmd, IndexSearcher searcher, Map<BytesRef, Integer> boostedPriority, boolean scale) throws IOException { super(null); this.reRankQuery = reRankQuery; this.reRankDocs = reRankDocs; this.length = length; this.boostedPriority = boostedPriority; this.scale = scale; Sort sort = cmd.getSort(); if (sort == null) { this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), true); } else { sort = sort.rewrite(searcher); this.mainCollector = TopFieldCollector.create( sort, Math.max(this.reRankDocs, length), false, true, true, true); } this.searcher = searcher; this.reRankWeight = reRankWeight; }
@Override public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext) throws IOException { final String field; final String term; if (isParentHit(hitContext.hit())) { field = ParentFieldMapper.NAME; term = Uid.createUid(hitContext.hit().type(), hitContext.hit().id()); } else if (isChildHit(hitContext.hit())) { field = UidFieldMapper.NAME; SearchHitField parentField = hitContext.hit().field(ParentFieldMapper.NAME); if (parentField != null) { term = parentField.getValue(); } else { SingleFieldsVisitor fieldsVisitor = new SingleFieldsVisitor(ParentFieldMapper.NAME); hitContext.reader().document(hitContext.docId(), fieldsVisitor); if (fieldsVisitor.fields().isEmpty()) { return Lucene.EMPTY_TOP_DOCS; } term = (String) fieldsVisitor.fields().get(ParentFieldMapper.NAME).get(0); } } else { return Lucene.EMPTY_TOP_DOCS; } // Only include docs that have the current hit as parent Filter filter = new TermFilter(new Term(field, term)); // Only include docs that have this inner hits type Filter typeFilter = documentMapper.typeFilter(); if (size() == 0) { TotalHitCountCollector collector = new TotalHitCountCollector(); context .searcher() .search( new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))), collector); return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0); } else { int topN = from() + size(); TopDocsCollector topDocsCollector; if (sort() != null) { topDocsCollector = TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), false); } else { topDocsCollector = TopScoreDocCollector.create(topN, false); } context .searcher() .search( new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))), topDocsCollector); return topDocsCollector.topDocs(from(), size()); } }
private PossiblyLimitedTopDocs getTopDocs(Query query, Sort sort) throws IOException { final TopFieldCollector topCollector = TopFieldCollector.create(sort, maxHits, true, false, false, false); final Counter clock = Counter.newCounter(true); final int waitMillis = 1000; // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector? final TimeLimitingCollector collector = new TimeLimitingCollector(topCollector, clock, maxSearchTimeMillis / waitMillis); collector.setBaseline(0); final Thread counterThread = new Thread() { @Override public void run() { final long startTime = System.currentTimeMillis(); while (true) { final long runTimeMillis = System.currentTimeMillis() - startTime; if (runTimeMillis > maxSearchTimeMillis) { // make sure there's no lingering thread for too long return; } clock.addAndGet(1); try { Thread.sleep(waitMillis); } catch (InterruptedException e) { throw new RuntimeException(e); } } } }; counterThread.setName("LuceneSearchTimeoutThread"); counterThread.start(); boolean timeLimitActivated = false; try { indexSearcher.search(query, collector); } catch (TimeLimitingCollector.TimeExceededException e) { timeLimitActivated = true; } return new PossiblyLimitedTopDocs(topCollector.topDocs(), timeLimitActivated); }
@Override public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext) throws IOException { Filter rawParentFilter; if (parentObjectMapper == null) { rawParentFilter = NonNestedDocsFilter.INSTANCE; } else { rawParentFilter = parentObjectMapper.nestedTypeFilter(); } FixedBitSetFilter parentFilter = context.fixedBitSetFilterCache().getFixedBitSetFilter(rawParentFilter); Filter childFilter = context.filterCache().cache(childObjectMapper.nestedTypeFilter()); Query q = new XFilteredQuery( query, new NestedChildrenFilter(parentFilter, childFilter, hitContext)); if (size() == 0) { TotalHitCountCollector collector = new TotalHitCountCollector(); context.searcher().search(q, collector); return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0); } else { int topN = from() + size(); TopDocsCollector topDocsCollector; if (sort() != null) { try { topDocsCollector = TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), true); } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } } else { topDocsCollector = TopScoreDocCollector.create(topN, true); } context.searcher().search(q, topDocsCollector); return topDocsCollector.topDocs(from(), size()); } }
/** * Accumulates groups for the BlockJoinQuery specified by its slot. * * @param slot Search query's slot * @param offset Parent docs offset * @param maxDocsPerGroup Upper bound of documents per group number * @param withinGroupOffset Offset within each group of child docs * @param withinGroupSort Sort criteria within groups * @param fillSortFields Specifies whether to add sort fields or not * @return TopGroups for the query specified by slot * @throws IOException if there is a low-level I/O error */ @SuppressWarnings({"unchecked", "rawtypes"}) private TopGroups<Integer> accumulateGroups( int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException { final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset]; final FakeScorer fakeScorer = new FakeScorer(); int totalGroupedHitCount = 0; // System.out.println("slot=" + slot); for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) { final OneGroup og = sortedGroups[groupIDX]; final int numChildDocs; if (slot == -1 || slot >= og.counts.length) { numChildDocs = 0; } else { numChildDocs = og.counts[slot]; } // Number of documents in group should be bounded to prevent redundant memory allocation final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup)); // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" // + maxDocsPerGroup); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: final TopDocsCollector<?> collector; if (withinGroupSort == null) { // System.out.println("sort by score"); // Sort by score if (!trackScores) { throw new IllegalArgumentException( "cannot sort by relevance within group: trackScores=false"); } collector = TopScoreDocCollector.create(numDocsInGroup, true); } else { // Sort by fields collector = TopFieldCollector.create( withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true); } collector.setScorer(fakeScorer); collector.setNextReader(og.readerContext); for (int docIDX = 0; docIDX < numChildDocs; docIDX++) { // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length); final int doc = og.docs[slot][docIDX]; fakeScorer.doc = doc; if (trackScores) { fakeScorer.score = og.scores[slot][docIDX]; } collector.collect(doc); } totalGroupedHitCount += numChildDocs; final Object[] groupSortValues; if (fillSortFields) { groupSortValues = new Object[comparators.length]; for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot); } } else { groupSortValues = null; } final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup); groups[groupIDX - offset] = new GroupDocs<>( og.score, topDocs.getMaxScore(), numChildDocs, topDocs.scoreDocs, og.doc, groupSortValues); } return new TopGroups<>( new TopGroups<>( sort.getSort(), withinGroupSort == null ? null : withinGroupSort.getSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount); }
/** * Retrieve suggestions, specifying whether all terms must match ({@code allTermsRequired}) and * whether the hits should be highlighted ({@code doHighlight}). */ public List<LookupResult> lookup( CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { if (searcherMgr == null) { throw new IllegalStateException("suggester was not built"); } final BooleanClause.Occur occur; if (allTermsRequired) { occur = BooleanClause.Occur.MUST; } else { occur = BooleanClause.Occur.SHOULD; } BooleanQuery query; Set<String> matchedTokens = new HashSet<>(); String prefixToken = null; try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) { // long t0 = System.currentTimeMillis(); ts.reset(); final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); String lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet<>(); while (ts.incrementToken()) { if (lastToken != null) { matchedTokens.add(lastToken); query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.toString(); if (lastToken != null) { maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset()); } } ts.end(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.endOffset()) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = getLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.add(lastQuery, occur); } } } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: // System.out.println("INFIX query=" + query); Query finalQuery = finishQuery(query, allTermsRequired); // System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = searcherMgr.acquire(); List<LookupResult> results = null; try { // System.out.println("got searcher=" + searcher); searcher.search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs) c.topDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { searcherMgr.release(searcher); } // System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); // System.out.println(results); return results; }