public void startSearch(String searchString) throws IOException {

    /*analyze(searchString);*/

    try {
      Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс
      IndexSearcher is = new IndexSearcher(directory); // объект поиска
      QueryParser parser =
          new QueryParser(
              Version.LUCENE_31,
              "name",
              new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор
      /* String str1 = "фотоаппарат";
      String str2 = "телевизор";
      String str3 = "SONY";
      String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3;
      System.out.println(total);*/
      Query query = parser.parse(searchString); // что ищем
      TopDocs results =
          is.search(
              query, null,
              10); // включаем поиск ограничиваемся 10 документами, results содержит ...
      System.out.println(
          "getMaxScore()="
              + results.getMaxScore()
              + " totalHits="
              + results
                  .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество
      // найденных документов

      /*proposalController.getProposalList().clear();*/

      for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки
        Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc

        for (Proposal proposal :
            proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) {

          proposalController.getProposalList().add(proposal);
          _log.info(
              "Предложение найдено:"
                  + proposal.getRecid().toString()
                  + ",Товар: "
                  + doc.get("recid")
                  + ", "
                  + doc.get("name"));
        }

        /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет
        addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/
      }

      directory.close();
    } catch (ParseException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    addMessage("Поиск выполнен");
  }
  private float checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults)
      throws Exception {
    query.setSlop(slop);

    Directory ramDir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(random, ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
    writer.addDocument(doc);

    IndexReader reader = writer.getReader();

    IndexSearcher searcher = newSearcher(reader);
    TopDocs td = searcher.search(query, null, 10);
    // System.out.println("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Expecting number of hits:
    // "+expectedNumResults+" maxScore="+td.getMaxScore());
    assertEquals(
        "slop: " + slop + "  query: " + query + "  doc: " + doc + "  Wrong number of hits",
        expectedNumResults,
        td.totalHits);

    // QueryUtils.check(query,searcher);
    writer.close();
    searcher.close();
    reader.close();
    ramDir.close();

    return td.getMaxScore();
  }
Пример #3
0
  /**
   * Tries to add new documents to hitDocs. Ensures that the hit numbered <code>min</code> has been
   * retrieved.
   */
  private final void getMoreDocs(int min) throws IOException {
    if (hitDocs.size() > min) {
      min = hitDocs.size();
    }

    int n = min * 2; // double # retrieved
    TopDocs topDocs =
        (sort == null)
            ? searcher.search(weight, filter, n)
            : searcher.search(weight, filter, n, sort);

    length = topDocs.totalHits;
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;

    float scoreNorm = 1.0f;

    if (length > 0 && topDocs.getMaxScore() > 1.0f) {
      scoreNorm = 1.0f / topDocs.getMaxScore();
    }

    int start = hitDocs.size() - nDeletedHits;

    // any new deletions?
    int nDels2 = countDeletions(searcher);
    debugCheckedForDeletions = false;
    if (nDeletions < 0 || nDels2 > nDeletions) {
      // either we cannot count deletions, or some "previously valid hits" might have been deleted,
      // so find exact start point
      nDeletedHits = 0;
      debugCheckedForDeletions = true;
      int i2 = 0;
      for (int i1 = 0; i1 < hitDocs.size() && i2 < scoreDocs.length; i1++) {
        int id1 = ((HitDoc) hitDocs.get(i1)).id;
        int id2 = scoreDocs[i2].doc;
        if (id1 == id2) {
          i2++;
        } else {
          nDeletedHits++;
        }
      }
      start = i2;
    }

    int end = scoreDocs.length < length ? scoreDocs.length : length;
    length += nDeletedHits;
    for (int i = start; i < end; i++) {
      hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm, scoreDocs[i].doc));
    }

    nDeletions = nDels2;
  }
Пример #4
0
  public String cacheSearch(Query query, String reuseId, int maxResults) throws IOException {
    if ((maxResults <= 0) || (maxResults > MAX_RESULTS)) {
      maxResults = MAX_RESULTS;
    }

    String id =
        ((reuseId != null) && (reuseId.length() > 0)) ? reuseId : UUID.randomUUID().toString();
    if (query != null) // otherwise it's an All Docs search which is the default
    {
      TopDocs docs = search(query, maxResults);
      docs.totalHits = Math.min(maxResults, docs.totalHits);
      cache.put(id, docs);
    }
    return id;
  }
 static void assertTopDocs(TopDocs actual, TopDocs expected) {
   assertThat(
       "actual.totalHits != expected.totalHits", actual.totalHits, equalTo(expected.totalHits));
   assertThat(
       "actual.getMaxScore() != expected.getMaxScore()",
       actual.getMaxScore(),
       equalTo(expected.getMaxScore()));
   assertThat(
       "actual.scoreDocs.length != expected.scoreDocs.length",
       actual.scoreDocs.length,
       equalTo(actual.scoreDocs.length));
   for (int i = 0; i < actual.scoreDocs.length; i++) {
     ScoreDoc actualHit = actual.scoreDocs[i];
     ScoreDoc expectedHit = expected.scoreDocs[i];
     assertThat("actualHit.doc != expectedHit.doc", actualHit.doc, equalTo(expectedHit.doc));
     assertThat(
         "actualHit.score != expectedHit.score", actualHit.score, equalTo(expectedHit.score));
   }
 }
  private void scaleScores(TopDocs topDocs, Map<Integer, Float> scoreMap) {

    float maxScore = topDocs.getMaxScore();
    float newMax = -Float.MAX_VALUE;

    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
      float score = scoreDoc.score;

      Float oldScore = scoreMap.get(scoreDoc.doc);
      if (oldScore == null || score == oldScore.floatValue()) {
        scoreDoc.score = score / maxScore;
      } else {
        scoreDoc.score = (score / maxScore) + 1;
      }

      if (scoreDoc.score > newMax) {
        newMax = scoreDoc.score;
      }
    }

    assert (newMax <= 2);
    topDocs.setMaxScore(newMax);
  }
 private void pruneSimilar(TopDocs docs) throws IOException {
   if (docs.scoreDocs.length == 0) {
     return;
   }
   int cutoff = docs.scoreDocs.length;
   double threshold = 0.005 * docs.scoreDocs[0].score;
   for (int i = 0, j = 100; j < docs.scoreDocs.length; i++, j++) {
     float delta = docs.scoreDocs[i].score - docs.scoreDocs[j].score;
     if (delta < threshold) {
       cutoff = j;
       break;
     }
   }
   if (cutoff < docs.scoreDocs.length) {
     //            LOG.info("pruned results from " + docs.scoreDocs.length + " to " + cutoff);
     docs.scoreDocs = ArrayUtils.subarray(docs.scoreDocs, 0, cutoff);
   }
 }
Пример #8
0
  private void executeRandomJoin(
      boolean multipleValuesPerDocument,
      int maxIndexIter,
      int maxSearchIter,
      int numberOfDocumentsToIndex)
      throws Exception {
    for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
      if (VERBOSE) {
        System.out.println("indexIter=" + indexIter);
      }
      Directory dir = newDirectory();
      RandomIndexWriter w =
          new RandomIndexWriter(
              random(),
              dir,
              newIndexWriterConfig(
                      TEST_VERSION_CURRENT,
                      new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
                  .setMergePolicy(newLogMergePolicy()));
      final boolean scoreDocsInOrder = TestJoinUtil.random().nextBoolean();
      IndexIterationContext context =
          createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder);

      IndexReader topLevelReader = w.getReader();
      w.close();
      for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
        if (VERBOSE) {
          System.out.println("searchIter=" + searchIter);
        }
        IndexSearcher indexSearcher = newSearcher(topLevelReader);

        int r = random().nextInt(context.randomUniqueValues.length);
        boolean from = context.randomFrom[r];
        String randomValue = context.randomUniqueValues[r];
        FixedBitSet expectedResult =
            createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);

        final Query actualQuery = new TermQuery(new Term("value", randomValue));
        if (VERBOSE) {
          System.out.println("actualQuery=" + actualQuery);
        }
        final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
        if (VERBOSE) {
          System.out.println("scoreMode=" + scoreMode);
        }

        final Query joinQuery;
        if (from) {
          joinQuery =
              JoinUtil.createJoinQuery(
                  "from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode);
        } else {
          joinQuery =
              JoinUtil.createJoinQuery(
                  "to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode);
        }
        if (VERBOSE) {
          System.out.println("joinQuery=" + joinQuery);
        }

        // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd
        // be also testing TopDocsCollector...
        final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
        final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, false);
        indexSearcher.search(
            joinQuery,
            new Collector() {

              int docBase;

              @Override
              public void collect(int doc) throws IOException {
                actualResult.set(doc + docBase);
                topScoreDocCollector.collect(doc);
              }

              @Override
              public void setNextReader(AtomicReaderContext context) {
                docBase = context.docBase;
                topScoreDocCollector.setNextReader(context);
              }

              @Override
              public void setScorer(Scorer scorer) throws IOException {
                topScoreDocCollector.setScorer(scorer);
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return scoreDocsInOrder;
              }
            });
        // Asserting bit set...
        if (VERBOSE) {
          System.out.println("expected cardinality:" + expectedResult.cardinality());
          DocIdSetIterator iterator = expectedResult.iterator();
          for (int doc = iterator.nextDoc();
              doc != DocIdSetIterator.NO_MORE_DOCS;
              doc = iterator.nextDoc()) {
            System.out.println(
                String.format(
                    Locale.ROOT,
                    "Expected doc[%d] with id value %s",
                    doc,
                    indexSearcher.doc(doc).get("id")));
          }
          System.out.println("actual cardinality:" + actualResult.cardinality());
          iterator = actualResult.iterator();
          for (int doc = iterator.nextDoc();
              doc != DocIdSetIterator.NO_MORE_DOCS;
              doc = iterator.nextDoc()) {
            System.out.println(
                String.format(
                    Locale.ROOT,
                    "Actual doc[%d] with id value %s",
                    doc,
                    indexSearcher.doc(doc).get("id")));
          }
        }
        assertEquals(expectedResult, actualResult);

        // Asserting TopDocs...
        TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
        TopDocs actualTopDocs = topScoreDocCollector.topDocs();
        assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
        assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
        if (scoreMode == ScoreMode.None) {
          continue;
        }

        assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
        for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
          if (VERBOSE) {
            System.out.printf(
                Locale.ENGLISH,
                "Expected doc: %d | Actual doc: %d\n",
                expectedTopDocs.scoreDocs[i].doc,
                actualTopDocs.scoreDocs[i].doc);
            System.out.printf(
                Locale.ENGLISH,
                "Expected score: %f | Actual score: %f\n",
                expectedTopDocs.scoreDocs[i].score,
                actualTopDocs.scoreDocs[i].score);
          }
          assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
          assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
          Explanation explanation =
              indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
          assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
        }
      }
      topLevelReader.close();
      dir.close();
    }
  }
Пример #9
0
  // public void search(String keyword,int first, int max,Sort sort) throws Exception {
  public void search(String keyword, String[] str) throws Exception {
    if (str.length > 0) {
      getDate gd = new getDate();
      String strtime = gd.GetNowDate();
      String searchStr = keyword;
      searchStr = searchStr.replace(" ", "");
      String filePath = "/home/hadoop/search/index";
      FileWriter fileWriter =
          new FileWriter("/home/hadoop/search/result/" + searchStr + "-" + strtime + ".txt");
      if (searchStr.equals("")) {
        fileWriter = new FileWriter("/home/hadoop/search/result/" + "Total-" + strtime + ".txt");
      }

      // FileWriter fileWriterTotal=new
      // FileWriter("/home/hadoop/search/result/Total"+searchStr+"-"+strtime+".txt");
      // FileWriter fileWriterContent=new
      // FileWriter("/home/hadoop/search/result/Content"+searchStr+"-"+strtime+".txt");
      Directory dir = FSDirectory.open(new File(filePath));
      IndexReader reader = DirectoryReader.open(dir);
      IndexSearcher searcher = new IndexSearcher(reader);
      Analyzer analyzer = new IKAnalyzer(true);
      TokenStream stream = analyzer.tokenStream("", new StringReader(searchStr));
      TopDocs topdocs = null;
      if (searchStr.equals("")) {
        Term term = new Term("Tag", searchStr);
        TermQuery query = new TermQuery(term);
        topdocs = searcher.search(query, 19999999);
      } else {
        BooleanQuery query = new BooleanQuery();

        // TokenStream stream = null;

        stream.reset();
        while (stream.incrementToken()) {
          for (int i = 0; i < str.length; i++) {
            System.out.println("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
            System.out.println(str[i]);
            TermQuery termQuery =
                new TermQuery(
                    new Term(str[i], stream.getAttribute(CharTermAttribute.class).toString()));
            query.add(termQuery, BooleanClause.Occur.SHOULD);
          }
          // TermQuery termQuery = new TermQuery(new Term("Review_Content",
          // stream.getAttribute(CharTermAttribute.class).toString()));
          // TermQuery termQueryhid = new TermQuery(new Term("Review_id",
          // stream.getAttribute(CharTermAttribute.class).toString()));
          // System.out.println(termQuery);
          // query.add(termQuery, BooleanClause.Occur.SHOULD);
          // query.add(termQueryhid,BooleanClause.Occur.SHOULD);
        }

        stream.close();
        // TopFieldCollector c = TopFieldCollector.create(sort, first + max, false, false, false,
        // false);
        // searcher.search(query, c);
        // ScoreDoc[] scoreDocs=c.topDocs(first, max).scoreDocs;
        // System.out.println(scoreDocs.length);
        topdocs = searcher.search(query, 19999999);
      }
      ScoreDoc[] scoreDocs = topdocs.scoreDocs;
      if (scoreDocs == null || scoreDocs.length < 1) return;

      List<String> sentlist = new ArrayList<String>();
      // System.out.println("查询结果总数---" + c.getTotalHits()+"最大的评分--");
      System.out.println("查询结果总数---" + topdocs.totalHits + "最大的评分--" + topdocs.getMaxScore());
      for (int i = 0; i < scoreDocs.length; i++) {
        int doc = scoreDocs[i].doc;
        String sent = null;
        Document document = searcher.doc(doc);

        // System.out.println("Hotel_id===="+document.get("Review_id"));
        // System.out.println("Review_Content===="+document.get("Review_Content"));
        // System.out.println("id--" + scoreDocs[i].doc + "---scors--" +
        // scoreDocs[i].score+"---index--"+scoreDocs[i].shardIndex);
        stream = analyzer.tokenStream("", new StringReader(document.get("Review_Content")));
        stream.reset();
        String outstr = "";
        while (stream.incrementToken()) {
          outstr += stream.getAttribute(CharTermAttribute.class).toString() + " ";
        }
        stream.close();

        String s = "";
        // fileWriter.write("Review_id:"+document.get("Review_id")+"|^|"+"Hotel_id:"+document.get("Hotel_id")+"|^|"+"Review_Content:"+out+"\n");
        sent = document.get("Review_id") + "\t" + document.get("Hotel_id") + "\t" + outstr + '\n';
        fileWriter.write(
            document.get("Review_id")
                + "\t"
                + document.get("Hotel_id")
                + "\t"
                + outstr
                + "\t"
                + document.get("id")
                + "\n");
      }
      fileWriter.close();
      fileWriter.flush();
      // fileWriter.flush();
      // ridlist.add(document.get("Review_id"));
      /*Term term=new Term("Review_id", document.get("Review_id"));
          TermQuery termquery=new TermQuery(term);
          TopDocs senttopdocs=searcher.search(termquery,1000);
          ScoreDoc[] sentscoreDocs = senttopdocs.scoreDocs;

          for (int j=0; j < sentscoreDocs.length; j++)
          {
              int sentdoc = sentscoreDocs[j].doc;
              Document sentdocument = searcher.doc(sentdoc);


              if (j == sentscoreDocs.length-1)
              {
                  sent += sentdocument.get("Review_Content").trim()+"。\n";
                  s+= sentdocument.get("Review_Content").trim()+"。\n";
              }
              else
              {
                  sent += sentdocument.get("Review_Content").trim()+",";
                  s+= sentdocument.get("Review_Content").trim()+",";
              }
          }
          fileWriterTotal.write(sent);
          //fileWriterTotal.flush();
          sentlist.add(sent);
          stream = analyzer.tokenStream("", new StringReader(s));
          stream.reset();
          String outs = "";
          while(stream.incrementToken()){
              outs +=  stream.getAttribute(CharTermAttribute.class).toString() + " ";
          }
          stream.close();
          fileWriterContent.write(outs + '\n');
          //fileWriterContent.flush();
      }
      fileWriter.flush();
      fileWriterTotal.flush();
      fileWriterContent.flush();

      fileWriterTotal.close();
      fileWriterContent.close();
      fileWriter.close();
      reader.close();*/
      // return new SearchResultBean(c.getTotalHits(), sentlist);
    }
  }
  /**
   * Accumulates groups for the BlockJoinQuery specified by its slot.
   *
   * @param slot Search query's slot
   * @param offset Parent docs offset
   * @param maxDocsPerGroup Upper bound of documents per group number
   * @param withinGroupOffset Offset within each group of child docs
   * @param withinGroupSort Sort criteria within groups
   * @param fillSortFields Specifies whether to add sort fields or not
   * @return TopGroups for the query specified by slot
   * @throws IOException if there is a low-level I/O error
   */
  @SuppressWarnings({"unchecked", "rawtypes"})
  private TopGroups<Integer> accumulateGroups(
      int slot,
      int offset,
      int maxDocsPerGroup,
      int withinGroupOffset,
      Sort withinGroupSort,
      boolean fillSortFields)
      throws IOException {
    final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
    final FakeScorer fakeScorer = new FakeScorer();

    int totalGroupedHitCount = 0;
    // System.out.println("slot=" + slot);

    for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) {
      final OneGroup og = sortedGroups[groupIDX];
      final int numChildDocs;
      if (slot == -1 || slot >= og.counts.length) {
        numChildDocs = 0;
      } else {
        numChildDocs = og.counts[slot];
      }

      // Number of documents in group should be bounded to prevent redundant memory allocation
      final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup));
      // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG="
      // + maxDocsPerGroup);

      // At this point we hold all docs w/ in each group,
      // unsorted; we now sort them:
      final TopDocsCollector<?> collector;
      if (withinGroupSort == null) {
        // System.out.println("sort by score");
        // Sort by score
        if (!trackScores) {
          throw new IllegalArgumentException(
              "cannot sort by relevance within group: trackScores=false");
        }
        collector = TopScoreDocCollector.create(numDocsInGroup, true);
      } else {
        // Sort by fields
        collector =
            TopFieldCollector.create(
                withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true);
      }

      collector.setScorer(fakeScorer);
      collector.setNextReader(og.readerContext);
      for (int docIDX = 0; docIDX < numChildDocs; docIDX++) {
        // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length);
        final int doc = og.docs[slot][docIDX];
        fakeScorer.doc = doc;
        if (trackScores) {
          fakeScorer.score = og.scores[slot][docIDX];
        }
        collector.collect(doc);
      }
      totalGroupedHitCount += numChildDocs;

      final Object[] groupSortValues;

      if (fillSortFields) {
        groupSortValues = new Object[comparators.length];
        for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) {
          groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot);
        }
      } else {
        groupSortValues = null;
      }

      final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup);

      groups[groupIDX - offset] =
          new GroupDocs<>(
              og.score,
              topDocs.getMaxScore(),
              numChildDocs,
              topDocs.scoreDocs,
              og.doc,
              groupSortValues);
    }

    return new TopGroups<>(
        new TopGroups<>(
            sort.getSort(),
            withinGroupSort == null ? null : withinGroupSort.getSort(),
            0,
            totalGroupedHitCount,
            groups,
            maxScore),
        totalHitCount);
  }
    public TopDocs topDocs(int start, int howMany) {

      try {

        TopDocs mainDocs = mainCollector.topDocs(0, Math.max(reRankDocs, length));

        if (mainDocs.totalHits == 0 || mainDocs.scoreDocs.length == 0) {
          return mainDocs;
        }

        if (boostedPriority != null) {
          SolrRequestInfo info = SolrRequestInfo.getRequestInfo();
          Map requestContext = null;
          if (info != null) {
            requestContext = info.getReq().getContext();
          }

          IntIntOpenHashMap boostedDocs =
              QueryElevationComponent.getBoostDocs(
                  (SolrIndexSearcher) searcher, boostedPriority, requestContext);

          ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs;
          ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)];
          System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length);

          mainDocs.scoreDocs = reRankScoreDocs;

          Map<Integer, Float> scoreMap = getScoreMap(mainDocs.scoreDocs, mainDocs.scoreDocs.length);

          TopDocs rescoredDocs =
              new QueryRescorer(reRankQuery) {
                @Override
                protected float combine(
                    float firstPassScore, boolean secondPassMatches, float secondPassScore) {
                  float score = firstPassScore;
                  if (secondPassMatches) {
                    score += reRankWeight * secondPassScore;
                  }
                  return score;
                }
              }.rescore(searcher, mainDocs, mainDocs.scoreDocs.length);

          Arrays.sort(
              rescoredDocs.scoreDocs,
              new BoostedComp(boostedDocs, mainDocs.scoreDocs, rescoredDocs.getMaxScore()));

          // Lower howMany if we've collected fewer documents.
          howMany = Math.min(howMany, mainScoreDocs.length);

          if (howMany == rescoredDocs.scoreDocs.length) {
            if (scale) {
              scaleScores(rescoredDocs, scoreMap);
            }
            return rescoredDocs; // Just return the rescoredDocs
          } else if (howMany > rescoredDocs.scoreDocs.length) {
            // We need to return more then we've reRanked, so create the combined page.
            ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
            System.arraycopy(
                mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length); // lay down the initial docs
            System.arraycopy(
                rescoredDocs.scoreDocs,
                0,
                scoreDocs,
                0,
                rescoredDocs.scoreDocs.length); // overlay the re-ranked docs.
            rescoredDocs.scoreDocs = scoreDocs;
            if (scale) {
              scaleScores(rescoredDocs, scoreMap);
            }
            return rescoredDocs;
          } else {
            // We've rescored more then we need to return.
            ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
            System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany);
            rescoredDocs.scoreDocs = scoreDocs;
            if (scale) {
              scaleScores(rescoredDocs, scoreMap);
            }
            return rescoredDocs;
          }

        } else {

          ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs;

          /*
           *  Create the array for the reRankScoreDocs.
           */
          ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)];

          /*
           *  Copy the initial results into the reRankScoreDocs array.
           */
          System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length);

          mainDocs.scoreDocs = reRankScoreDocs;

          Map<Integer, Float> scoreMap = getScoreMap(mainDocs.scoreDocs, mainDocs.scoreDocs.length);

          TopDocs rescoredDocs =
              new QueryRescorer(reRankQuery) {
                @Override
                protected float combine(
                    float firstPassScore, boolean secondPassMatches, float secondPassScore) {
                  float score = firstPassScore;
                  if (secondPassMatches) {
                    score += reRankWeight * secondPassScore;
                  }
                  return score;
                }
              }.rescore(searcher, mainDocs, mainDocs.scoreDocs.length);

          // Lower howMany to return if we've collected fewer documents.
          howMany = Math.min(howMany, mainScoreDocs.length);

          if (howMany == rescoredDocs.scoreDocs.length) {
            if (scale) {
              scaleScores(rescoredDocs, scoreMap);
            }
            return rescoredDocs; // Just return the rescoredDocs
          } else if (howMany > rescoredDocs.scoreDocs.length) {

            // We need to return more then we've reRanked, so create the combined page.
            ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
            // lay down the initial docs
            System.arraycopy(mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length);
            // overlay the rescoreds docs
            System.arraycopy(
                rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length);
            rescoredDocs.scoreDocs = scoreDocs;
            if (scale) {
              assert (scoreMap != null);
              scaleScores(rescoredDocs, scoreMap);
            }
            return rescoredDocs;
          } else {
            // We've rescored more then we need to return.
            ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
            System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany);
            rescoredDocs.scoreDocs = scoreDocs;
            if (scale) {
              scaleScores(rescoredDocs, scoreMap);
            }
            return rescoredDocs;
          }
        }
      } catch (Exception e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }
    }
Пример #12
0
  public static void writeTopDocs(StreamOutput out, TopDocs topDocs, int from) throws IOException {
    if (topDocs.scoreDocs.length - from < 0) {
      out.writeBoolean(false);
      return;
    }
    out.writeBoolean(true);
    if (topDocs instanceof TopFieldDocs) {
      out.writeBoolean(true);
      TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs;

      out.writeVInt(topDocs.totalHits);
      out.writeFloat(topDocs.getMaxScore());

      out.writeVInt(topFieldDocs.fields.length);
      for (SortField sortField : topFieldDocs.fields) {
        if (sortField.getField() == null) {
          out.writeBoolean(false);
        } else {
          out.writeBoolean(true);
          out.writeString(sortField.getField());
        }
        if (sortField.getComparatorSource() != null) {
          writeSortType(
              out,
              ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource())
                  .reducedType());
        } else {
          writeSortType(out, sortField.getType());
        }
        out.writeBoolean(sortField.getReverse());
      }

      out.writeVInt(topDocs.scoreDocs.length - from);
      int index = 0;
      for (ScoreDoc doc : topFieldDocs.scoreDocs) {
        if (index++ < from) {
          continue;
        }
        FieldDoc fieldDoc = (FieldDoc) doc;
        out.writeVInt(fieldDoc.fields.length);
        for (Object field : fieldDoc.fields) {
          if (field == null) {
            out.writeByte((byte) 0);
          } else {
            Class type = field.getClass();
            if (type == String.class) {
              out.writeByte((byte) 1);
              out.writeString((String) field);
            } else if (type == Integer.class) {
              out.writeByte((byte) 2);
              out.writeInt((Integer) field);
            } else if (type == Long.class) {
              out.writeByte((byte) 3);
              out.writeLong((Long) field);
            } else if (type == Float.class) {
              out.writeByte((byte) 4);
              out.writeFloat((Float) field);
            } else if (type == Double.class) {
              out.writeByte((byte) 5);
              out.writeDouble((Double) field);
            } else if (type == Byte.class) {
              out.writeByte((byte) 6);
              out.writeByte((Byte) field);
            } else if (type == Short.class) {
              out.writeByte((byte) 7);
              out.writeShort((Short) field);
            } else if (type == Boolean.class) {
              out.writeByte((byte) 8);
              out.writeBoolean((Boolean) field);
            } else if (type == BytesRef.class) {
              out.writeByte((byte) 9);
              out.writeBytesRef((BytesRef) field);
            } else {
              throw new IOException("Can't handle sort field value of type [" + type + "]");
            }
          }
        }

        out.writeVInt(doc.doc);
        out.writeFloat(doc.score);
      }
    } else {
      out.writeBoolean(false);
      out.writeVInt(topDocs.totalHits);
      out.writeFloat(topDocs.getMaxScore());

      out.writeVInt(topDocs.scoreDocs.length - from);
      int index = 0;
      for (ScoreDoc doc : topDocs.scoreDocs) {
        if (index++ < from) {
          continue;
        }
        out.writeVInt(doc.doc);
        out.writeFloat(doc.score);
      }
    }
  }
Пример #13
0
  /**
   * Auxiliary method used by the {@link #merge} impls. A sort value of null is used to indicate
   * that docs should be sorted by score.
   */
  private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits)
      throws IOException {
    final PriorityQueue<ShardRef> queue;
    if (sort == null) {
      queue = new ScoreMergeSortQueue(shardHits);
    } else {
      queue = new MergeSortQueue(sort, shardHits);
    }

    int totalHitCount = 0;
    int availHitCount = 0;
    float maxScore = Float.MIN_VALUE;
    for (int shardIDX = 0; shardIDX < shardHits.length; shardIDX++) {
      final TopDocs shard = shardHits[shardIDX];
      // totalHits can be non-zero even if no hits were
      // collected, when searchAfter was used:
      totalHitCount += shard.totalHits;
      if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
        availHitCount += shard.scoreDocs.length;
        queue.add(new ShardRef(shardIDX));
        maxScore = Math.max(maxScore, shard.getMaxScore());
        // System.out.println("  maxScore now " + maxScore + " vs " + shard.getMaxScore());
      }
    }

    if (availHitCount == 0) {
      maxScore = Float.NaN;
    }

    final ScoreDoc[] hits;
    if (availHitCount <= start) {
      hits = new ScoreDoc[0];
    } else {
      hits = new ScoreDoc[Math.min(size, availHitCount - start)];
      int requestedResultWindow = start + size;
      int numIterOnHits = Math.min(availHitCount, requestedResultWindow);
      int hitUpto = 0;
      while (hitUpto < numIterOnHits) {
        assert queue.size() > 0;
        ShardRef ref = queue.pop();
        final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
        hit.shardIndex = ref.shardIndex;
        if (hitUpto >= start) {
          hits[hitUpto - start] = hit;
        }

        // System.out.println("  hitUpto=" + hitUpto);
        // System.out.println("    doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);

        hitUpto++;

        if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
          // Not done with this these TopDocs yet:
          queue.add(ref);
        }
      }
    }

    if (sort == null) {
      return new TopDocs(totalHitCount, hits, maxScore);
    } else {
      return new TopFieldDocs(totalHitCount, hits, sort.getSort(), maxScore);
    }
  }
  /**
   * @param scrollSort Whether to ignore the from and sort all hits in each shard result. Only used
   *     for scroll search
   * @param resultsArr Shard result holder
   */
  public ScoreDoc[] sortDocs(
      boolean scrollSort, AtomicArray<? extends QuerySearchResultProvider> resultsArr)
      throws IOException {
    List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> results =
        resultsArr.asList();
    if (results.isEmpty()) {
      return EMPTY_DOCS;
    }

    if (optimizeSingleShard) {
      boolean canOptimize = false;
      QuerySearchResult result = null;
      int shardIndex = -1;
      if (results.size() == 1) {
        canOptimize = true;
        result = results.get(0).value.queryResult();
        shardIndex = results.get(0).index;
      } else {
        // lets see if we only got hits from a single shard, if so, we can optimize...
        for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : results) {
          if (entry.value.queryResult().topDocs().scoreDocs.length > 0) {
            if (result != null) { // we already have one, can't really optimize
              canOptimize = false;
              break;
            }
            canOptimize = true;
            result = entry.value.queryResult();
            shardIndex = entry.index;
          }
        }
      }
      if (canOptimize) {
        int offset = result.from();
        if (scrollSort) {
          offset = 0;
        }
        ScoreDoc[] scoreDocs = result.topDocs().scoreDocs;
        if (scoreDocs.length == 0 || scoreDocs.length < offset) {
          return EMPTY_DOCS;
        }

        int resultDocsSize = result.size();
        if ((scoreDocs.length - offset) < resultDocsSize) {
          resultDocsSize = scoreDocs.length - offset;
        }
        ScoreDoc[] docs = new ScoreDoc[resultDocsSize];
        for (int i = 0; i < resultDocsSize; i++) {
          ScoreDoc scoreDoc = scoreDocs[offset + i];
          scoreDoc.shardIndex = shardIndex;
          docs[i] = scoreDoc;
        }
        return docs;
      }
    }

    @SuppressWarnings("unchecked")
    AtomicArray.Entry<? extends QuerySearchResultProvider>[] sortedResults =
        results.toArray(new AtomicArray.Entry[results.size()]);
    Arrays.sort(sortedResults, QUERY_RESULT_ORDERING);
    QuerySearchResultProvider firstResult = sortedResults[0].value;

    final Sort sort;
    if (firstResult.queryResult().topDocs() instanceof TopFieldDocs) {
      TopFieldDocs firstTopDocs = (TopFieldDocs) firstResult.queryResult().topDocs();
      sort = new Sort(firstTopDocs.fields);
    } else {
      sort = null;
    }

    int topN = firstResult.queryResult().size();
    // Need to use the length of the resultsArr array, since the slots will be based on the position
    // in the resultsArr array
    TopDocs[] shardTopDocs = new TopDocs[resultsArr.length()];
    if (firstResult.includeFetch()) {
      // if we did both query and fetch on the same go, we have fetched all the docs from each
      // shards already, use them...
      // this is also important since we shortcut and fetch only docs from "from" and up to "size"
      topN *= sortedResults.length;
    }
    for (AtomicArray.Entry<? extends QuerySearchResultProvider> sortedResult : sortedResults) {
      TopDocs topDocs = sortedResult.value.queryResult().topDocs();
      // the 'index' field is the position in the resultsArr atomic array
      shardTopDocs[sortedResult.index] = topDocs;
    }
    int from = firstResult.queryResult().from();
    if (scrollSort) {
      from = 0;
    }
    // TopDocs#merge can't deal with null shard TopDocs
    for (int i = 0; i < shardTopDocs.length; i++) {
      if (shardTopDocs[i] == null) {
        shardTopDocs[i] = Lucene.EMPTY_TOP_DOCS;
      }
    }
    TopDocs mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
    return mergedTopDocs.scoreDocs;
  }