예제 #1
0
  @Test
  public void testRandomQueries() throws Exception {
    String[] vals = {"w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz"};

    int tot = 0;

    BooleanQuery q1 = null;
    try {

      // increase number of iterations for more complete testing
      int num = atLeast(10);
      for (int i = 0; i < num; i++) {
        int level = random.nextInt(3);
        q1 =
            randBoolQuery(
                new Random(random.nextLong()), random.nextBoolean(), level, field, vals, null);

        // Can't sort by relevance since floating point numbers may not quite
        // match up.
        Sort sort = Sort.INDEXORDER;

        QueryUtils.check(random, q1, searcher);
        final Similarity oldSim = searcher.getSimilarity();
        try {
          searcher.setSimilarity(new FunkySimilarity());
          QueryUtils.check(random, q1, searcher);
        } finally {
          searcher.setSimilarity(oldSim);
        }

        TopFieldCollector collector = TopFieldCollector.create(sort, 1000, false, true, true, true);

        searcher.search(q1, null, collector);
        ScoreDoc[] hits1 = collector.topDocs().scoreDocs;

        collector = TopFieldCollector.create(sort, 1000, false, true, true, false);

        searcher.search(q1, null, collector);
        ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
        tot += hits2.length;
        CheckHits.checkEqual(q1, hits1, hits2);

        BooleanQuery q3 = new BooleanQuery();
        q3.add(q1, BooleanClause.Occur.SHOULD);
        q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD);
        TopDocs hits4 = bigSearcher.search(q3, 1);
        assertEquals(mulFactor * collector.totalHits + NUM_EXTRA_DOCS / 2, hits4.totalHits);
      }

    } catch (Exception e) {
      // For easier debugging
      System.out.println("failed query: " + q1);
      throw e;
    }

    // System.out.println("Total hits:"+tot);
  }
 /** Search, sorting by {@link Sort}, and computing drill down and sideways counts. */
 public DrillSidewaysResult search(
     DrillDownQuery query,
     Filter filter,
     FieldDoc after,
     int topN,
     Sort sort,
     boolean doDocScores,
     boolean doMaxScore)
     throws IOException {
   if (filter != null) {
     query = new DrillDownQuery(config, filter, query);
   }
   if (sort != null) {
     int limit = searcher.getIndexReader().maxDoc();
     if (limit == 0) {
       limit = 1; // the collector does not alow numHits = 0
     }
     topN = Math.min(topN, limit);
     final TopFieldCollector hitCollector =
         TopFieldCollector.create(sort, topN, after, true, doDocScores, doMaxScore, true);
     DrillSidewaysResult r = search(query, hitCollector);
     return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
   } else {
     return search(after, query, topN);
   }
 }
예제 #3
0
  private OtpErlangObject match(LucenePageToken pageToken, int pageSize)
      throws IOException, ParseException {
    IndexReader reader = IndexReader.open(this.index);

    Query q = this.queryParser().parse(pageToken.getQueryString());

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs;

    Sort sort = new Sort(pageToken.getSortFields());
    TopFieldCollector collector =
        TopFieldCollector.create(
            sort, pageToken.getNextFirstHit() + pageSize - 1, true, true, true, true);
    searcher.search(q, collector);
    topDocs = collector.topDocs(pageToken.getNextFirstHit() - 1);

    ScoreDoc[] hits = topDocs.scoreDocs;
    // jlog.info("Sort: " + sort + "; topDocs: " + topDocs + "; hits: + " +
    // hits);
    int firstHit = 0;
    if (hits.length > 0) {
      firstHit = pageToken.getNextFirstHit();
    }

    List<Document> docs = new ArrayList<Document>(hits.length);
    for (ScoreDoc sd : hits) {
      docs.add(searcher.doc(sd.doc));
    }
    searcher.close();

    boolean nextPage =
        hits.length == pageSize && pageToken.incrementFirstHit(pageSize) <= topDocs.totalHits;

    OtpErlangList valuesAsList = this.translator.convert(docs, hits);

    // Metadata as a proplist

    OtpErlangObject[] metadata = new OtpErlangObject[nextPage ? 3 : 2];
    metadata[0] =
        new OtpErlangTuple(
            new OtpErlangObject[] {
              new OtpErlangAtom("total_hits"), new OtpErlangLong(topDocs.totalHits)
            });
    metadata[1] =
        new OtpErlangTuple(
            new OtpErlangObject[] {new OtpErlangAtom("first_hit"), new OtpErlangLong(firstHit)});
    if (nextPage) {
      metadata[2] =
          new OtpErlangTuple(
              new OtpErlangObject[] {
                new OtpErlangAtom("next_page"), new OtpErlangBinary(pageToken)
              });
    }

    OtpErlangList metadataAsList = new OtpErlangList(metadata);

    // Final result
    return new OtpErlangTuple(new OtpErlangObject[] {valuesAsList, metadataAsList});
  }
예제 #4
0
  private PossiblyLimitedTopDocs getTopDocs(Query query, Sort sort) throws IOException {
    final TopFieldCollector topCollector =
        TopFieldCollector.create(sort, maxHits, true, false, false, false);
    final Counter clock = Counter.newCounter(true);
    final int waitMillis = 1000;
    // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector?
    final TimeLimitingCollector collector =
        new TimeLimitingCollector(topCollector, clock, maxSearchTimeMillis / waitMillis);
    collector.setBaseline(0);
    final Thread counterThread =
        new Thread() {
          @Override
          public void run() {
            final long startTime = System.currentTimeMillis();
            while (true) {
              final long runTimeMillis = System.currentTimeMillis() - startTime;
              if (runTimeMillis > maxSearchTimeMillis) {
                // make sure there's no lingering thread for too long
                return;
              }
              clock.addAndGet(1);
              try {
                Thread.sleep(waitMillis);
              } catch (InterruptedException e) {
                throw new RuntimeException(e);
              }
            }
          }
        };
    counterThread.setName("LuceneSearchTimeoutThread");
    counterThread.start();

    boolean timeLimitActivated = false;
    try {
      indexSearcher.search(query, collector);
    } catch (TimeLimitingCollector.TimeExceededException e) {
      timeLimitActivated = true;
    }
    return new PossiblyLimitedTopDocs(topCollector.topDocs(), timeLimitActivated);
  }
  /**
   * Retrieve suggestions, specifying whether all terms must match ({@code allTermsRequired}) and
   * whether the hits should be highlighted ({@code doHighlight}).
   */
  public List<LookupResult> lookup(
      CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {

    if (searcherMgr == null) {
      throw new IllegalStateException("suggester was not built");
    }

    final BooleanClause.Occur occur;
    if (allTermsRequired) {
      occur = BooleanClause.Occur.MUST;
    } else {
      occur = BooleanClause.Occur.SHOULD;
    }

    BooleanQuery query;
    Set<String> matchedTokens = new HashSet<>();
    String prefixToken = null;

    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
      // long t0 = System.currentTimeMillis();
      ts.reset();
      final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      String lastToken = null;
      query = new BooleanQuery();
      int maxEndOffset = -1;
      matchedTokens = new HashSet<>();
      while (ts.incrementToken()) {
        if (lastToken != null) {
          matchedTokens.add(lastToken);
          query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
        }
        lastToken = termAtt.toString();
        if (lastToken != null) {
          maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
        }
      }
      ts.end();

      if (lastToken != null) {
        Query lastQuery;
        if (maxEndOffset == offsetAtt.endOffset()) {
          // Use PrefixQuery (or the ngram equivalent) when
          // there was no trailing discarded chars in the
          // string (e.g. whitespace), so that if query does
          // not end with a space we show prefix matches for
          // that token:
          lastQuery = getLastTokenQuery(lastToken);
          prefixToken = lastToken;
        } else {
          // Use TermQuery for an exact match if there were
          // trailing discarded chars (e.g. whitespace), so
          // that if query ends with a space we only show
          // exact matches for that term:
          matchedTokens.add(lastToken);
          lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
        }
        if (lastQuery != null) {
          query.add(lastQuery, occur);
        }
      }
    }

    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:

    // System.out.println("INFIX query=" + query);

    Query finalQuery = finishQuery(query, allTermsRequired);

    // System.out.println("finalQuery=" + query);

    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);

    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    IndexSearcher searcher = searcherMgr.acquire();
    List<LookupResult> results = null;
    try {
      // System.out.println("got searcher=" + searcher);
      searcher.search(finalQuery, c2);

      TopFieldDocs hits = (TopFieldDocs) c.topDocs();

      // Slower way if postings are not pre-sorted by weight:
      // hits = searcher.search(query, null, num, SORT);
      results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
      searcherMgr.release(searcher);
    }

    // System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
    // System.out.println(results);

    return results;
  }