/** Search, sorting by {@link Sort}, and computing drill down and sideways counts. */
 public DrillSidewaysResult search(
     DrillDownQuery query,
     Filter filter,
     FieldDoc after,
     int topN,
     Sort sort,
     boolean doDocScores,
     boolean doMaxScore)
     throws IOException {
   if (filter != null) {
     query = new DrillDownQuery(config, filter, query);
   }
   if (sort != null) {
     int limit = searcher.getIndexReader().maxDoc();
     if (limit == 0) {
       limit = 1; // the collector does not alow numHits = 0
     }
     topN = Math.min(topN, limit);
     final TopFieldCollector hitCollector =
         TopFieldCollector.create(sort, topN, after, true, doDocScores, doMaxScore, true);
     DrillSidewaysResult r = search(query, hitCollector);
     return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
   } else {
     return search(after, query, topN);
   }
 }
示例#2
0
  private OtpErlangObject match(LucenePageToken pageToken, int pageSize)
      throws IOException, ParseException {
    IndexReader reader = IndexReader.open(this.index);

    Query q = this.queryParser().parse(pageToken.getQueryString());

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs;

    Sort sort = new Sort(pageToken.getSortFields());
    TopFieldCollector collector =
        TopFieldCollector.create(
            sort, pageToken.getNextFirstHit() + pageSize - 1, true, true, true, true);
    searcher.search(q, collector);
    topDocs = collector.topDocs(pageToken.getNextFirstHit() - 1);

    ScoreDoc[] hits = topDocs.scoreDocs;
    // jlog.info("Sort: " + sort + "; topDocs: " + topDocs + "; hits: + " +
    // hits);
    int firstHit = 0;
    if (hits.length > 0) {
      firstHit = pageToken.getNextFirstHit();
    }

    List<Document> docs = new ArrayList<Document>(hits.length);
    for (ScoreDoc sd : hits) {
      docs.add(searcher.doc(sd.doc));
    }
    searcher.close();

    boolean nextPage =
        hits.length == pageSize && pageToken.incrementFirstHit(pageSize) <= topDocs.totalHits;

    OtpErlangList valuesAsList = this.translator.convert(docs, hits);

    // Metadata as a proplist

    OtpErlangObject[] metadata = new OtpErlangObject[nextPage ? 3 : 2];
    metadata[0] =
        new OtpErlangTuple(
            new OtpErlangObject[] {
              new OtpErlangAtom("total_hits"), new OtpErlangLong(topDocs.totalHits)
            });
    metadata[1] =
        new OtpErlangTuple(
            new OtpErlangObject[] {new OtpErlangAtom("first_hit"), new OtpErlangLong(firstHit)});
    if (nextPage) {
      metadata[2] =
          new OtpErlangTuple(
              new OtpErlangObject[] {
                new OtpErlangAtom("next_page"), new OtpErlangBinary(pageToken)
              });
    }

    OtpErlangList metadataAsList = new OtpErlangList(metadata);

    // Final result
    return new OtpErlangTuple(new OtpErlangObject[] {valuesAsList, metadataAsList});
  }
  @Test
  public void testRandomQueries() throws Exception {
    String[] vals = {"w1", "w2", "w3", "w4", "w5", "xx", "yy", "zzz"};

    int tot = 0;

    BooleanQuery q1 = null;
    try {

      // increase number of iterations for more complete testing
      int num = atLeast(10);
      for (int i = 0; i < num; i++) {
        int level = random.nextInt(3);
        q1 =
            randBoolQuery(
                new Random(random.nextLong()), random.nextBoolean(), level, field, vals, null);

        // Can't sort by relevance since floating point numbers may not quite
        // match up.
        Sort sort = Sort.INDEXORDER;

        QueryUtils.check(random, q1, searcher);
        final Similarity oldSim = searcher.getSimilarity();
        try {
          searcher.setSimilarity(new FunkySimilarity());
          QueryUtils.check(random, q1, searcher);
        } finally {
          searcher.setSimilarity(oldSim);
        }

        TopFieldCollector collector = TopFieldCollector.create(sort, 1000, false, true, true, true);

        searcher.search(q1, null, collector);
        ScoreDoc[] hits1 = collector.topDocs().scoreDocs;

        collector = TopFieldCollector.create(sort, 1000, false, true, true, false);

        searcher.search(q1, null, collector);
        ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
        tot += hits2.length;
        CheckHits.checkEqual(q1, hits1, hits2);

        BooleanQuery q3 = new BooleanQuery();
        q3.add(q1, BooleanClause.Occur.SHOULD);
        q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD);
        TopDocs hits4 = bigSearcher.search(q3, 1);
        assertEquals(mulFactor * collector.totalHits + NUM_EXTRA_DOCS / 2, hits4.totalHits);
      }

    } catch (Exception e) {
      // For easier debugging
      System.out.println("failed query: " + q1);
      throw e;
    }

    // System.out.println("Total hits:"+tot);
  }
示例#4
0
  /**
   * Creates and submits a query to the Lucene engine.
   *
   * @param query The base query, using the query engine supported by Lucene.
   * @param sort A Lucene sort object, can contain one or more sort criterias. If <tt>null</tt>,
   *     sort by hit score.
   * @param virtualWikiNames Comma separated list of virtual wiki names to search in, may be
   *     <tt>null</tt> to search all virtual wikis.
   * @param languages Comma separated list of language codes to search in, may be <tt>null</tt> or
   *     empty to search all languages.
   * @param indexes List of Lucene indexes (readers) to search.
   * @param context The context of the request.
   * @return The list of search results.
   * @throws IOException If the Lucene searchers encounter a problem reading the indexes.
   * @throws ParseException If the query is not valid.
   */
  private SearchResults search(
      String query,
      Sort sort,
      String virtualWikiNames,
      String languages,
      IndexReader[] indexes,
      XWikiContext context)
      throws IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(new MultiReader(indexes, true));

    // Enhance the base query with wiki names and languages.
    Query q = buildQuery(query, virtualWikiNames, languages);

    // Perform the actual search
    TopDocsCollector<? extends ScoreDoc> results;
    if (sort != null) {
      results = TopFieldCollector.create(sort, 1000, true, true, false, false);
    } else {
      results = TopScoreDocCollector.create(1000, false);
    }
    searcher.search(q, results);
    LOGGER.debug("query [{}] returned {} hits", q, results.getTotalHits());

    // Transform the raw Lucene search results into XWiki-aware results
    return new SearchResults(
        results, searcher, new com.xpn.xwiki.api.XWiki(context.getWiki(), context), context);
  }
 public ReRankCollector(
     int reRankDocs,
     int length,
     Query reRankQuery,
     double reRankWeight,
     SolrIndexSearcher.QueryCommand cmd,
     IndexSearcher searcher,
     Map<BytesRef, Integer> boostedPriority,
     boolean scale)
     throws IOException {
   super(null);
   this.reRankQuery = reRankQuery;
   this.reRankDocs = reRankDocs;
   this.length = length;
   this.boostedPriority = boostedPriority;
   this.scale = scale;
   Sort sort = cmd.getSort();
   if (sort == null) {
     this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), true);
   } else {
     sort = sort.rewrite(searcher);
     this.mainCollector =
         TopFieldCollector.create(
             sort, Math.max(this.reRankDocs, length), false, true, true, true);
   }
   this.searcher = searcher;
   this.reRankWeight = reRankWeight;
 }
    @Override
    public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext)
        throws IOException {
      final String field;
      final String term;
      if (isParentHit(hitContext.hit())) {
        field = ParentFieldMapper.NAME;
        term = Uid.createUid(hitContext.hit().type(), hitContext.hit().id());
      } else if (isChildHit(hitContext.hit())) {
        field = UidFieldMapper.NAME;
        SearchHitField parentField = hitContext.hit().field(ParentFieldMapper.NAME);
        if (parentField != null) {
          term = parentField.getValue();
        } else {
          SingleFieldsVisitor fieldsVisitor = new SingleFieldsVisitor(ParentFieldMapper.NAME);
          hitContext.reader().document(hitContext.docId(), fieldsVisitor);
          if (fieldsVisitor.fields().isEmpty()) {
            return Lucene.EMPTY_TOP_DOCS;
          }
          term = (String) fieldsVisitor.fields().get(ParentFieldMapper.NAME).get(0);
        }
      } else {
        return Lucene.EMPTY_TOP_DOCS;
      }
      // Only include docs that have the current hit as parent
      Filter filter = new TermFilter(new Term(field, term));
      // Only include docs that have this inner hits type
      Filter typeFilter = documentMapper.typeFilter();

      if (size() == 0) {
        TotalHitCountCollector collector = new TotalHitCountCollector();
        context
            .searcher()
            .search(
                new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))),
                collector);
        return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
      } else {
        int topN = from() + size();
        TopDocsCollector topDocsCollector;
        if (sort() != null) {
          topDocsCollector =
              TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), false);
        } else {
          topDocsCollector = TopScoreDocCollector.create(topN, false);
        }
        context
            .searcher()
            .search(
                new XFilteredQuery(query, new AndFilter(Arrays.asList(filter, typeFilter))),
                topDocsCollector);
        return topDocsCollector.topDocs(from(), size());
      }
    }
示例#7
0
  private PossiblyLimitedTopDocs getTopDocs(Query query, Sort sort) throws IOException {
    final TopFieldCollector topCollector =
        TopFieldCollector.create(sort, maxHits, true, false, false, false);
    final Counter clock = Counter.newCounter(true);
    final int waitMillis = 1000;
    // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector?
    final TimeLimitingCollector collector =
        new TimeLimitingCollector(topCollector, clock, maxSearchTimeMillis / waitMillis);
    collector.setBaseline(0);
    final Thread counterThread =
        new Thread() {
          @Override
          public void run() {
            final long startTime = System.currentTimeMillis();
            while (true) {
              final long runTimeMillis = System.currentTimeMillis() - startTime;
              if (runTimeMillis > maxSearchTimeMillis) {
                // make sure there's no lingering thread for too long
                return;
              }
              clock.addAndGet(1);
              try {
                Thread.sleep(waitMillis);
              } catch (InterruptedException e) {
                throw new RuntimeException(e);
              }
            }
          }
        };
    counterThread.setName("LuceneSearchTimeoutThread");
    counterThread.start();

    boolean timeLimitActivated = false;
    try {
      indexSearcher.search(query, collector);
    } catch (TimeLimitingCollector.TimeExceededException e) {
      timeLimitActivated = true;
    }
    return new PossiblyLimitedTopDocs(topCollector.topDocs(), timeLimitActivated);
  }
    @Override
    public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext)
        throws IOException {
      Filter rawParentFilter;
      if (parentObjectMapper == null) {
        rawParentFilter = NonNestedDocsFilter.INSTANCE;
      } else {
        rawParentFilter = parentObjectMapper.nestedTypeFilter();
      }
      FixedBitSetFilter parentFilter =
          context.fixedBitSetFilterCache().getFixedBitSetFilter(rawParentFilter);
      Filter childFilter = context.filterCache().cache(childObjectMapper.nestedTypeFilter());
      Query q =
          new XFilteredQuery(
              query, new NestedChildrenFilter(parentFilter, childFilter, hitContext));

      if (size() == 0) {
        TotalHitCountCollector collector = new TotalHitCountCollector();
        context.searcher().search(q, collector);
        return new TopDocs(collector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
      } else {
        int topN = from() + size();
        TopDocsCollector topDocsCollector;
        if (sort() != null) {
          try {
            topDocsCollector =
                TopFieldCollector.create(sort(), topN, true, trackScores(), trackScores(), true);
          } catch (IOException e) {
            throw ExceptionsHelper.convertToElastic(e);
          }
        } else {
          topDocsCollector = TopScoreDocCollector.create(topN, true);
        }
        context.searcher().search(q, topDocsCollector);
        return topDocsCollector.topDocs(from(), size());
      }
    }
  /**
   * Accumulates groups for the BlockJoinQuery specified by its slot.
   *
   * @param slot Search query's slot
   * @param offset Parent docs offset
   * @param maxDocsPerGroup Upper bound of documents per group number
   * @param withinGroupOffset Offset within each group of child docs
   * @param withinGroupSort Sort criteria within groups
   * @param fillSortFields Specifies whether to add sort fields or not
   * @return TopGroups for the query specified by slot
   * @throws IOException if there is a low-level I/O error
   */
  @SuppressWarnings({"unchecked", "rawtypes"})
  private TopGroups<Integer> accumulateGroups(
      int slot,
      int offset,
      int maxDocsPerGroup,
      int withinGroupOffset,
      Sort withinGroupSort,
      boolean fillSortFields)
      throws IOException {
    final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
    final FakeScorer fakeScorer = new FakeScorer();

    int totalGroupedHitCount = 0;
    // System.out.println("slot=" + slot);

    for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) {
      final OneGroup og = sortedGroups[groupIDX];
      final int numChildDocs;
      if (slot == -1 || slot >= og.counts.length) {
        numChildDocs = 0;
      } else {
        numChildDocs = og.counts[slot];
      }

      // Number of documents in group should be bounded to prevent redundant memory allocation
      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
      // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG="
      // + maxDocsPerGroup);

      // At this point we hold all docs w/ in each group,
      // unsorted; we now sort them:
      final TopDocsCollector<?> collector;
      if (withinGroupSort == null) {
        // System.out.println("sort by score");
        // Sort by score
        if (!trackScores) {
          throw new IllegalArgumentException(
              "cannot sort by relevance within group: trackScores=false");
        }
        collector = TopScoreDocCollector.create(numDocsInGroup, true);
      } else {
        // Sort by fields
        collector =
            TopFieldCollector.create(
                withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
      }

      collector.setScorer(fakeScorer);
      collector.setNextReader(og.readerContext);
      for (int docIDX = 0; docIDX < numChildDocs; docIDX++) {
        // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
        final int doc = og.docs[slot][docIDX];
        fakeScorer.doc = doc;
        if (trackScores) {
          fakeScorer.score = og.scores[slot][docIDX];
        }
        collector.collect(doc);
      }
      totalGroupedHitCount += numChildDocs;

      final Object[] groupSortValues;

      if (fillSortFields) {
        groupSortValues = new Object[comparators.length];
        for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) {
          groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
        }
      } else {
        groupSortValues = null;
      }

      final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);

      groups[groupIDX - offset] =
          new GroupDocs<>(
              og.score,
              topDocs.getMaxScore(),
              numChildDocs,
              topDocs.scoreDocs,
              og.doc,
              groupSortValues);
    }

    return new TopGroups<>(
        new TopGroups<>(
            sort.getSort(),
            withinGroupSort == null ? null : withinGroupSort.getSort(),
            0,
            totalGroupedHitCount,
            groups,
            maxScore),
        totalHitCount);
  }
  /**
   * Retrieve suggestions, specifying whether all terms must match ({@code allTermsRequired}) and
   * whether the hits should be highlighted ({@code doHighlight}).
   */
  public List<LookupResult> lookup(
      CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {

    if (searcherMgr == null) {
      throw new IllegalStateException("suggester was not built");
    }

    final BooleanClause.Occur occur;
    if (allTermsRequired) {
      occur = BooleanClause.Occur.MUST;
    } else {
      occur = BooleanClause.Occur.SHOULD;
    }

    BooleanQuery query;
    Set<String> matchedTokens = new HashSet<>();
    String prefixToken = null;

    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
      // long t0 = System.currentTimeMillis();
      ts.reset();
      final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      String lastToken = null;
      query = new BooleanQuery();
      int maxEndOffset = -1;
      matchedTokens = new HashSet<>();
      while (ts.incrementToken()) {
        if (lastToken != null) {
          matchedTokens.add(lastToken);
          query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
        }
        lastToken = termAtt.toString();
        if (lastToken != null) {
          maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
        }
      }
      ts.end();

      if (lastToken != null) {
        Query lastQuery;
        if (maxEndOffset == offsetAtt.endOffset()) {
          // Use PrefixQuery (or the ngram equivalent) when
          // there was no trailing discarded chars in the
          // string (e.g. whitespace), so that if query does
          // not end with a space we show prefix matches for
          // that token:
          lastQuery = getLastTokenQuery(lastToken);
          prefixToken = lastToken;
        } else {
          // Use TermQuery for an exact match if there were
          // trailing discarded chars (e.g. whitespace), so
          // that if query ends with a space we only show
          // exact matches for that term:
          matchedTokens.add(lastToken);
          lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
        }
        if (lastQuery != null) {
          query.add(lastQuery, occur);
        }
      }
    }

    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:

    // System.out.println("INFIX query=" + query);

    Query finalQuery = finishQuery(query, allTermsRequired);

    // System.out.println("finalQuery=" + query);

    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);

    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    IndexSearcher searcher = searcherMgr.acquire();
    List<LookupResult> results = null;
    try {
      // System.out.println("got searcher=" + searcher);
      searcher.search(finalQuery, c2);

      TopFieldDocs hits = (TopFieldDocs) c.topDocs();

      // Slower way if postings are not pre-sorted by weight:
      // hits = searcher.search(query, null, num, SORT);
      results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
      searcherMgr.release(searcher);
    }

    // System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
    // System.out.println(results);

    return results;
  }