@Test public void testRandomQueries() throws Exception { String[] vals = {"w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz"}; int tot = 0; BooleanQuery q1 = null; try { // increase number of iterations for more complete testing int num = atLeast(10); for (int i = 0; i < num; i++) { int level = random.nextInt(3); q1 = randBoolQuery( new Random(random.nextLong()), random.nextBoolean(), level, field, vals, null); // Can't sort by relevance since floating point numbers may not quite // match up. Sort sort = Sort.INDEXORDER; QueryUtils.check(random, q1, searcher); final Similarity oldSim = searcher.getSimilarity(); try { searcher.setSimilarity(new FunkySimilarity()); QueryUtils.check(random, q1, searcher); } finally { searcher.setSimilarity(oldSim); } TopFieldCollector collector = TopFieldCollector.create(sort, 1000, false, true, true, true); searcher.search(q1, null, collector); ScoreDoc[] hits1 = collector.topDocs().scoreDocs; collector = TopFieldCollector.create(sort, 1000, false, true, true, false); searcher.search(q1, null, collector); ScoreDoc[] hits2 = collector.topDocs().scoreDocs; tot += hits2.length; CheckHits.checkEqual(q1, hits1, hits2); BooleanQuery q3 = new BooleanQuery(); q3.add(q1, BooleanClause.Occur.SHOULD); q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD); TopDocs hits4 = bigSearcher.search(q3, 1); assertEquals(mulFactor * collector.totalHits + NUM_EXTRA_DOCS / 2, hits4.totalHits); } } catch (Exception e) { // For easier debugging System.out.println("failed query: " + q1); throw e; } // System.out.println("Total hits:"+tot); }
/** Search, sorting by {@link Sort}, and computing drill down and sideways counts. */ public DrillSidewaysResult search( DrillDownQuery query, Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException { if (filter != null) { query = new DrillDownQuery(config, filter, query); } if (sort != null) { int limit = searcher.getIndexReader().maxDoc(); if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } topN = Math.min(topN, limit); final TopFieldCollector hitCollector = TopFieldCollector.create(sort, topN, after, true, doDocScores, doMaxScore, true); DrillSidewaysResult r = search(query, hitCollector); return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); } else { return search(after, query, topN); } }
private OtpErlangObject match(LucenePageToken pageToken, int pageSize) throws IOException, ParseException { IndexReader reader = IndexReader.open(this.index); Query q = this.queryParser().parse(pageToken.getQueryString()); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs; Sort sort = new Sort(pageToken.getSortFields()); TopFieldCollector collector = TopFieldCollector.create( sort, pageToken.getNextFirstHit() + pageSize - 1, true, true, true, true); searcher.search(q, collector); topDocs = collector.topDocs(pageToken.getNextFirstHit() - 1); ScoreDoc[] hits = topDocs.scoreDocs; // jlog.info("Sort: " + sort + "; topDocs: " + topDocs + "; hits: + " + // hits); int firstHit = 0; if (hits.length > 0) { firstHit = pageToken.getNextFirstHit(); } List<Document> docs = new ArrayList<Document>(hits.length); for (ScoreDoc sd : hits) { docs.add(searcher.doc(sd.doc)); } searcher.close(); boolean nextPage = hits.length == pageSize && pageToken.incrementFirstHit(pageSize) <= topDocs.totalHits; OtpErlangList valuesAsList = this.translator.convert(docs, hits); // Metadata as a proplist OtpErlangObject[] metadata = new OtpErlangObject[nextPage ? 3 : 2]; metadata[0] = new OtpErlangTuple( new OtpErlangObject[] { new OtpErlangAtom("total_hits"), new OtpErlangLong(topDocs.totalHits) }); metadata[1] = new OtpErlangTuple( new OtpErlangObject[] {new OtpErlangAtom("first_hit"), new OtpErlangLong(firstHit)}); if (nextPage) { metadata[2] = new OtpErlangTuple( new OtpErlangObject[] { new OtpErlangAtom("next_page"), new OtpErlangBinary(pageToken) }); } OtpErlangList metadataAsList = new OtpErlangList(metadata); // Final result return new OtpErlangTuple(new OtpErlangObject[] {valuesAsList, metadataAsList}); }
private PossiblyLimitedTopDocs getTopDocs(Query query, Sort sort) throws IOException { final TopFieldCollector topCollector = TopFieldCollector.create(sort, maxHits, true, false, false, false); final Counter clock = Counter.newCounter(true); final int waitMillis = 1000; // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector? final TimeLimitingCollector collector = new TimeLimitingCollector(topCollector, clock, maxSearchTimeMillis / waitMillis); collector.setBaseline(0); final Thread counterThread = new Thread() { @Override public void run() { final long startTime = System.currentTimeMillis(); while (true) { final long runTimeMillis = System.currentTimeMillis() - startTime; if (runTimeMillis > maxSearchTimeMillis) { // make sure there's no lingering thread for too long return; } clock.addAndGet(1); try { Thread.sleep(waitMillis); } catch (InterruptedException e) { throw new RuntimeException(e); } } } }; counterThread.setName("LuceneSearchTimeoutThread"); counterThread.start(); boolean timeLimitActivated = false; try { indexSearcher.search(query, collector); } catch (TimeLimitingCollector.TimeExceededException e) { timeLimitActivated = true; } return new PossiblyLimitedTopDocs(topCollector.topDocs(), timeLimitActivated); }
/** * Retrieve suggestions, specifying whether all terms must match ({@code allTermsRequired}) and * whether the hits should be highlighted ({@code doHighlight}). */ public List<LookupResult> lookup( CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException { if (searcherMgr == null) { throw new IllegalStateException("suggester was not built"); } final BooleanClause.Occur occur; if (allTermsRequired) { occur = BooleanClause.Occur.MUST; } else { occur = BooleanClause.Occur.SHOULD; } BooleanQuery query; Set<String> matchedTokens = new HashSet<>(); String prefixToken = null; try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) { // long t0 = System.currentTimeMillis(); ts.reset(); final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); String lastToken = null; query = new BooleanQuery(); int maxEndOffset = -1; matchedTokens = new HashSet<>(); while (ts.incrementToken()) { if (lastToken != null) { matchedTokens.add(lastToken); query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur); } lastToken = termAtt.toString(); if (lastToken != null) { maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset()); } } ts.end(); if (lastToken != null) { Query lastQuery; if (maxEndOffset == offsetAtt.endOffset()) { // Use PrefixQuery (or the ngram equivalent) when // there was no trailing discarded chars in the // string (e.g. whitespace), so that if query does // not end with a space we show prefix matches for // that token: lastQuery = getLastTokenQuery(lastToken); prefixToken = lastToken; } else { // Use TermQuery for an exact match if there were // trailing discarded chars (e.g. whitespace), so // that if query ends with a space we only show // exact matches for that term: matchedTokens.add(lastToken); lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)); } if (lastQuery != null) { query.add(lastQuery, occur); } } } // TODO: we could allow blended sort here, combining // weight w/ score. Now we ignore score and sort only // by weight: // System.out.println("INFIX query=" + query); Query finalQuery = finishQuery(query, allTermsRequired); // System.out.println("finalQuery=" + query); // Sort by weight, descending: TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false); // We sorted postings by weight during indexing, so we // only retrieve the first num hits now: Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num); IndexSearcher searcher = searcherMgr.acquire(); List<LookupResult> results = null; try { // System.out.println("got searcher=" + searcher); searcher.search(finalQuery, c2); TopFieldDocs hits = (TopFieldDocs) c.topDocs(); // Slower way if postings are not pre-sorted by weight: // hits = searcher.search(query, null, num, SORT); results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken); } finally { searcherMgr.release(searcher); } // System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); // System.out.println(results); return results; }