public void startSearch(String searchString) throws IOException { /*analyze(searchString);*/ try { Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс IndexSearcher is = new IndexSearcher(directory); // объект поиска QueryParser parser = new QueryParser( Version.LUCENE_31, "name", new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор /* String str1 = "фотоаппарат"; String str2 = "телевизор"; String str3 = "SONY"; String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3; System.out.println(total);*/ Query query = parser.parse(searchString); // что ищем TopDocs results = is.search( query, null, 10); // включаем поиск ограничиваемся 10 документами, results содержит ... System.out.println( "getMaxScore()=" + results.getMaxScore() + " totalHits=" + results .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество // найденных документов /*proposalController.getProposalList().clear();*/ for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc for (Proposal proposal : proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) { proposalController.getProposalList().add(proposal); _log.info( "Предложение найдено:" + proposal.getRecid().toString() + ",Товар: " + doc.get("recid") + ", " + doc.get("name")); } /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/ } directory.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } addMessage("Поиск выполнен"); }
private float checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) throws Exception { query.setSlop(slop); Directory ramDir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false)); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); TopDocs td = searcher.search(query, null, 10); // System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: // "+expectedNumResults+" maxScore="+td.getMaxScore()); assertEquals( "slop: " + slop + " query: " + query + " doc: " + doc + " Wrong number of hits", expectedNumResults, td.totalHits); // QueryUtils.check(query,searcher); writer.close(); searcher.close(); reader.close(); ramDir.close(); return td.getMaxScore(); }
/** * Tries to add new documents to hitDocs. Ensures that the hit numbered <code>min</code> has been * retrieved. */ private final void getMoreDocs(int min) throws IOException { if (hitDocs.size() > min) { min = hitDocs.size(); } int n = min * 2; // double # retrieved TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort); length = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; float scoreNorm = 1.0f; if (length > 0 && topDocs.getMaxScore() > 1.0f) { scoreNorm = 1.0f / topDocs.getMaxScore(); } int start = hitDocs.size() - nDeletedHits; // any new deletions? int nDels2 = countDeletions(searcher); debugCheckedForDeletions = false; if (nDeletions < 0 || nDels2 > nDeletions) { // either we cannot count deletions, or some "previously valid hits" might have been deleted, // so find exact start point nDeletedHits = 0; debugCheckedForDeletions = true; int i2 = 0; for (int i1 = 0; i1 < hitDocs.size() && i2 < scoreDocs.length; i1++) { int id1 = ((HitDoc) hitDocs.get(i1)).id; int id2 = scoreDocs[i2].doc; if (id1 == id2) { i2++; } else { nDeletedHits++; } } start = i2; } int end = scoreDocs.length < length ? scoreDocs.length : length; length += nDeletedHits; for (int i = start; i < end; i++) { hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm, scoreDocs[i].doc)); } nDeletions = nDels2; }
public String cacheSearch(Query query, String reuseId, int maxResults) throws IOException { if ((maxResults <= 0) || (maxResults > MAX_RESULTS)) { maxResults = MAX_RESULTS; } String id = ((reuseId != null) && (reuseId.length() > 0)) ? reuseId : UUID.randomUUID().toString(); if (query != null) // otherwise it's an All Docs search which is the default { TopDocs docs = search(query, maxResults); docs.totalHits = Math.min(maxResults, docs.totalHits); cache.put(id, docs); } return id; }
static void assertTopDocs(TopDocs actual, TopDocs expected) { assertThat( "actual.totalHits != expected.totalHits", actual.totalHits, equalTo(expected.totalHits)); assertThat( "actual.getMaxScore() != expected.getMaxScore()", actual.getMaxScore(), equalTo(expected.getMaxScore())); assertThat( "actual.scoreDocs.length != expected.scoreDocs.length", actual.scoreDocs.length, equalTo(actual.scoreDocs.length)); for (int i = 0; i < actual.scoreDocs.length; i++) { ScoreDoc actualHit = actual.scoreDocs[i]; ScoreDoc expectedHit = expected.scoreDocs[i]; assertThat("actualHit.doc != expectedHit.doc", actualHit.doc, equalTo(expectedHit.doc)); assertThat( "actualHit.score != expectedHit.score", actualHit.score, equalTo(expectedHit.score)); } }
private void scaleScores(TopDocs topDocs, Map<Integer, Float> scoreMap) { float maxScore = topDocs.getMaxScore(); float newMax = -Float.MAX_VALUE; for (ScoreDoc scoreDoc : topDocs.scoreDocs) { float score = scoreDoc.score; Float oldScore = scoreMap.get(scoreDoc.doc); if (oldScore == null || score == oldScore.floatValue()) { scoreDoc.score = score / maxScore; } else { scoreDoc.score = (score / maxScore) + 1; } if (scoreDoc.score > newMax) { newMax = scoreDoc.score; } } assert (newMax <= 2); topDocs.setMaxScore(newMax); }
private void pruneSimilar(TopDocs docs) throws IOException { if (docs.scoreDocs.length == 0) { return; } int cutoff = docs.scoreDocs.length; double threshold = 0.005 * docs.scoreDocs[0].score; for (int i = 0, j = 100; j < docs.scoreDocs.length; i++, j++) { float delta = docs.scoreDocs[i].score - docs.scoreDocs[j].score; if (delta < threshold) { cutoff = j; break; } } if (cutoff < docs.scoreDocs.length) { // LOG.info("pruned results from " + docs.scoreDocs.length + " to " + cutoff); docs.scoreDocs = ArrayUtils.subarray(docs.scoreDocs, 0, cutoff); } }
private void executeRandomJoin( boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (VERBOSE) { System.out.println("indexIter=" + indexIter); } Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)) .setMergePolicy(newLogMergePolicy())); final boolean scoreDocsInOrder = TestJoinUtil.random().nextBoolean(); IndexIterationContext context = createContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.getReader(); w.close(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (VERBOSE) { System.out.println("searchIter=" + searchIter); } IndexSearcher indexSearcher = newSearcher(topLevelReader); int r = random().nextInt(context.randomUniqueValues.length); boolean from = context.randomFrom[r]; String randomValue = context.randomUniqueValues[r]; FixedBitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context); final Query actualQuery = new TermQuery(new Term("value", randomValue)); if (VERBOSE) { System.out.println("actualQuery=" + actualQuery); } final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)]; if (VERBOSE) { System.out.println("scoreMode=" + scoreMode); } final Query joinQuery; if (from) { joinQuery = JoinUtil.createJoinQuery( "from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.createJoinQuery( "to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (VERBOSE) { System.out.println("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd // be also testing TopDocsCollector... final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc()); final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, false); indexSearcher.search( joinQuery, new Collector() { int docBase; @Override public void collect(int doc) throws IOException { actualResult.set(doc + docBase); topScoreDocCollector.collect(doc); } @Override public void setNextReader(AtomicReaderContext context) { docBase = context.docBase; topScoreDocCollector.setNextReader(context); } @Override public void setScorer(Scorer scorer) throws IOException { topScoreDocCollector.setScorer(scorer); } @Override public boolean acceptsDocsOutOfOrder() { return scoreDocsInOrder; } }); // Asserting bit set... if (VERBOSE) { System.out.println("expected cardinality:" + expectedResult.cardinality()); DocIdSetIterator iterator = expectedResult.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { System.out.println( String.format( Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); } System.out.println("actual cardinality:" + actualResult.cardinality()); iterator = actualResult.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { System.out.println( String.format( Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.topDocs(); assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits); assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f); for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) { if (VERBOSE) { System.out.printf( Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); System.out.printf( Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score); } assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc); assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f); Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc); assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f); } } topLevelReader.close(); dir.close(); } }
// public void search(String keyword,int first, int max,Sort sort) throws Exception { public void search(String keyword, String[] str) throws Exception { if (str.length > 0) { getDate gd = new getDate(); String strtime = gd.GetNowDate(); String searchStr = keyword; searchStr = searchStr.replace(" ", ""); String filePath = "/home/hadoop/search/index"; FileWriter fileWriter = new FileWriter("/home/hadoop/search/result/" + searchStr + "-" + strtime + ".txt"); if (searchStr.equals("")) { fileWriter = new FileWriter("/home/hadoop/search/result/" + "Total-" + strtime + ".txt"); } // FileWriter fileWriterTotal=new // FileWriter("/home/hadoop/search/result/Total"+searchStr+"-"+strtime+".txt"); // FileWriter fileWriterContent=new // FileWriter("/home/hadoop/search/result/Content"+searchStr+"-"+strtime+".txt"); Directory dir = FSDirectory.open(new File(filePath)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new IKAnalyzer(true); TokenStream stream = analyzer.tokenStream("", new StringReader(searchStr)); TopDocs topdocs = null; if (searchStr.equals("")) { Term term = new Term("Tag", searchStr); TermQuery query = new TermQuery(term); topdocs = searcher.search(query, 19999999); } else { BooleanQuery query = new BooleanQuery(); // TokenStream stream = null; stream.reset(); while (stream.incrementToken()) { for (int i = 0; i < str.length; i++) { System.out.println("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); System.out.println(str[i]); TermQuery termQuery = new TermQuery( new Term(str[i], stream.getAttribute(CharTermAttribute.class).toString())); query.add(termQuery, BooleanClause.Occur.SHOULD); } // TermQuery termQuery = new TermQuery(new Term("Review_Content", // stream.getAttribute(CharTermAttribute.class).toString())); // TermQuery termQueryhid = new TermQuery(new Term("Review_id", // stream.getAttribute(CharTermAttribute.class).toString())); // System.out.println(termQuery); // query.add(termQuery, BooleanClause.Occur.SHOULD); // query.add(termQueryhid,BooleanClause.Occur.SHOULD); } stream.close(); // TopFieldCollector c = TopFieldCollector.create(sort, first + max, false, false, false, // false); // searcher.search(query, c); // ScoreDoc[] scoreDocs=c.topDocs(first, max).scoreDocs; // System.out.println(scoreDocs.length); topdocs = searcher.search(query, 19999999); } ScoreDoc[] scoreDocs = topdocs.scoreDocs; if (scoreDocs == null || scoreDocs.length < 1) return; List<String> sentlist = new ArrayList<String>(); // System.out.println("查询结果总数---" + c.getTotalHits()+"最大的评分--"); System.out.println("查询结果总数---" + topdocs.totalHits + "最大的评分--" + topdocs.getMaxScore()); for (int i = 0; i < scoreDocs.length; i++) { int doc = scoreDocs[i].doc; String sent = null; Document document = searcher.doc(doc); // System.out.println("Hotel_id===="+document.get("Review_id")); // System.out.println("Review_Content===="+document.get("Review_Content")); // System.out.println("id--" + scoreDocs[i].doc + "---scors--" + // scoreDocs[i].score+"---index--"+scoreDocs[i].shardIndex); stream = analyzer.tokenStream("", new StringReader(document.get("Review_Content"))); stream.reset(); String outstr = ""; while (stream.incrementToken()) { outstr += stream.getAttribute(CharTermAttribute.class).toString() + " "; } stream.close(); String s = ""; // fileWriter.write("Review_id:"+document.get("Review_id")+"|^|"+"Hotel_id:"+document.get("Hotel_id")+"|^|"+"Review_Content:"+out+"\n"); sent = document.get("Review_id") + "\t" + document.get("Hotel_id") + "\t" + outstr + '\n'; fileWriter.write( document.get("Review_id") + "\t" + document.get("Hotel_id") + "\t" + outstr + "\t" + document.get("id") + "\n"); } fileWriter.close(); fileWriter.flush(); // fileWriter.flush(); // ridlist.add(document.get("Review_id")); /*Term term=new Term("Review_id", document.get("Review_id")); TermQuery termquery=new TermQuery(term); TopDocs senttopdocs=searcher.search(termquery,1000); ScoreDoc[] sentscoreDocs = senttopdocs.scoreDocs; for (int j=0; j < sentscoreDocs.length; j++) { int sentdoc = sentscoreDocs[j].doc; Document sentdocument = searcher.doc(sentdoc); if (j == sentscoreDocs.length-1) { sent += sentdocument.get("Review_Content").trim()+"。\n"; s+= sentdocument.get("Review_Content").trim()+"。\n"; } else { sent += sentdocument.get("Review_Content").trim()+","; s+= sentdocument.get("Review_Content").trim()+","; } } fileWriterTotal.write(sent); //fileWriterTotal.flush(); sentlist.add(sent); stream = analyzer.tokenStream("", new StringReader(s)); stream.reset(); String outs = ""; while(stream.incrementToken()){ outs += stream.getAttribute(CharTermAttribute.class).toString() + " "; } stream.close(); fileWriterContent.write(outs + '\n'); //fileWriterContent.flush(); } fileWriter.flush(); fileWriterTotal.flush(); fileWriterContent.flush(); fileWriterTotal.close(); fileWriterContent.close(); fileWriter.close(); reader.close();*/ // return new SearchResultBean(c.getTotalHits(), sentlist); } }
/** * Accumulates groups for the BlockJoinQuery specified by its slot. * * @param slot Search query's slot * @param offset Parent docs offset * @param maxDocsPerGroup Upper bound of documents per group number * @param withinGroupOffset Offset within each group of child docs * @param withinGroupSort Sort criteria within groups * @param fillSortFields Specifies whether to add sort fields or not * @return TopGroups for the query specified by slot * @throws IOException if there is a low-level I/O error */ @SuppressWarnings({"unchecked", "rawtypes"}) private TopGroups<Integer> accumulateGroups( int slot, int offset, int maxDocsPerGroup, int withinGroupOffset, Sort withinGroupSort, boolean fillSortFields) throws IOException { final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset]; final FakeScorer fakeScorer = new FakeScorer(); int totalGroupedHitCount = 0; // System.out.println("slot=" + slot); for (int groupIDX = offset; groupIDX < sortedGroups.length; groupIDX++) { final OneGroup og = sortedGroups[groupIDX]; final int numChildDocs; if (slot == -1 || slot >= og.counts.length) { numChildDocs = 0; } else { numChildDocs = og.counts[slot]; } // Number of documents in group should be bounded to prevent redundant memory allocation final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup)); // System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" // + maxDocsPerGroup); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: final TopDocsCollector<?> collector; if (withinGroupSort == null) { // System.out.println("sort by score"); // Sort by score if (!trackScores) { throw new IllegalArgumentException( "cannot sort by relevance within group: trackScores=false"); } collector = TopScoreDocCollector.create(numDocsInGroup, true); } else { // Sort by fields collector = TopFieldCollector.create( withinGroupSort, numDocsInGroup, fillSortFields, trackScores, trackMaxScore, true); } collector.setScorer(fakeScorer); collector.setNextReader(og.readerContext); for (int docIDX = 0; docIDX < numChildDocs; docIDX++) { // System.out.println("docIDX=" + docIDX + " vs " + og.docs[slot].length); final int doc = og.docs[slot][docIDX]; fakeScorer.doc = doc; if (trackScores) { fakeScorer.score = og.scores[slot][docIDX]; } collector.collect(doc); } totalGroupedHitCount += numChildDocs; final Object[] groupSortValues; if (fillSortFields) { groupSortValues = new Object[comparators.length]; for (int sortFieldIDX = 0; sortFieldIDX < comparators.length; sortFieldIDX++) { groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.slot); } } else { groupSortValues = null; } final TopDocs topDocs = collector.topDocs(withinGroupOffset, numDocsInGroup); groups[groupIDX - offset] = new GroupDocs<>( og.score, topDocs.getMaxScore(), numChildDocs, topDocs.scoreDocs, og.doc, groupSortValues); } return new TopGroups<>( new TopGroups<>( sort.getSort(), withinGroupSort == null ? null : withinGroupSort.getSort(), 0, totalGroupedHitCount, groups, maxScore), totalHitCount); }
public TopDocs topDocs(int start, int howMany) { try { TopDocs mainDocs = mainCollector.topDocs(0, Math.max(reRankDocs, length)); if (mainDocs.totalHits == 0 || mainDocs.scoreDocs.length == 0) { return mainDocs; } if (boostedPriority != null) { SolrRequestInfo info = SolrRequestInfo.getRequestInfo(); Map requestContext = null; if (info != null) { requestContext = info.getReq().getContext(); } IntIntOpenHashMap boostedDocs = QueryElevationComponent.getBoostDocs( (SolrIndexSearcher) searcher, boostedPriority, requestContext); ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs; ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)]; System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length); mainDocs.scoreDocs = reRankScoreDocs; Map<Integer, Float> scoreMap = getScoreMap(mainDocs.scoreDocs, mainDocs.scoreDocs.length); TopDocs rescoredDocs = new QueryRescorer(reRankQuery) { @Override protected float combine( float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score += reRankWeight * secondPassScore; } return score; } }.rescore(searcher, mainDocs, mainDocs.scoreDocs.length); Arrays.sort( rescoredDocs.scoreDocs, new BoostedComp(boostedDocs, mainDocs.scoreDocs, rescoredDocs.getMaxScore())); // Lower howMany if we've collected fewer documents. howMany = Math.min(howMany, mainScoreDocs.length); if (howMany == rescoredDocs.scoreDocs.length) { if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; // Just return the rescoredDocs } else if (howMany > rescoredDocs.scoreDocs.length) { // We need to return more then we've reRanked, so create the combined page. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy( mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length); // lay down the initial docs System.arraycopy( rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length); // overlay the re-ranked docs. rescoredDocs.scoreDocs = scoreDocs; if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } else { // We've rescored more then we need to return. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany); rescoredDocs.scoreDocs = scoreDocs; if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } } else { ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs; /* * Create the array for the reRankScoreDocs. */ ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)]; /* * Copy the initial results into the reRankScoreDocs array. */ System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length); mainDocs.scoreDocs = reRankScoreDocs; Map<Integer, Float> scoreMap = getScoreMap(mainDocs.scoreDocs, mainDocs.scoreDocs.length); TopDocs rescoredDocs = new QueryRescorer(reRankQuery) { @Override protected float combine( float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score += reRankWeight * secondPassScore; } return score; } }.rescore(searcher, mainDocs, mainDocs.scoreDocs.length); // Lower howMany to return if we've collected fewer documents. howMany = Math.min(howMany, mainScoreDocs.length); if (howMany == rescoredDocs.scoreDocs.length) { if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; // Just return the rescoredDocs } else if (howMany > rescoredDocs.scoreDocs.length) { // We need to return more then we've reRanked, so create the combined page. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; // lay down the initial docs System.arraycopy(mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length); // overlay the rescoreds docs System.arraycopy( rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length); rescoredDocs.scoreDocs = scoreDocs; if (scale) { assert (scoreMap != null); scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } else { // We've rescored more then we need to return. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany); rescoredDocs.scoreDocs = scoreDocs; if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } } } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } }
public static void writeTopDocs(StreamOutput out, TopDocs topDocs, int from) throws IOException { if (topDocs.scoreDocs.length - from < 0) { out.writeBoolean(false); return; } out.writeBoolean(true); if (topDocs instanceof TopFieldDocs) { out.writeBoolean(true); TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs; out.writeVInt(topDocs.totalHits); out.writeFloat(topDocs.getMaxScore()); out.writeVInt(topFieldDocs.fields.length); for (SortField sortField : topFieldDocs.fields) { if (sortField.getField() == null) { out.writeBoolean(false); } else { out.writeBoolean(true); out.writeString(sortField.getField()); } if (sortField.getComparatorSource() != null) { writeSortType( out, ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()) .reducedType()); } else { writeSortType(out, sortField.getType()); } out.writeBoolean(sortField.getReverse()); } out.writeVInt(topDocs.scoreDocs.length - from); int index = 0; for (ScoreDoc doc : topFieldDocs.scoreDocs) { if (index++ < from) { continue; } FieldDoc fieldDoc = (FieldDoc) doc; out.writeVInt(fieldDoc.fields.length); for (Object field : fieldDoc.fields) { if (field == null) { out.writeByte((byte) 0); } else { Class type = field.getClass(); if (type == String.class) { out.writeByte((byte) 1); out.writeString((String) field); } else if (type == Integer.class) { out.writeByte((byte) 2); out.writeInt((Integer) field); } else if (type == Long.class) { out.writeByte((byte) 3); out.writeLong((Long) field); } else if (type == Float.class) { out.writeByte((byte) 4); out.writeFloat((Float) field); } else if (type == Double.class) { out.writeByte((byte) 5); out.writeDouble((Double) field); } else if (type == Byte.class) { out.writeByte((byte) 6); out.writeByte((Byte) field); } else if (type == Short.class) { out.writeByte((byte) 7); out.writeShort((Short) field); } else if (type == Boolean.class) { out.writeByte((byte) 8); out.writeBoolean((Boolean) field); } else if (type == BytesRef.class) { out.writeByte((byte) 9); out.writeBytesRef((BytesRef) field); } else { throw new IOException("Can't handle sort field value of type [" + type + "]"); } } } out.writeVInt(doc.doc); out.writeFloat(doc.score); } } else { out.writeBoolean(false); out.writeVInt(topDocs.totalHits); out.writeFloat(topDocs.getMaxScore()); out.writeVInt(topDocs.scoreDocs.length - from); int index = 0; for (ScoreDoc doc : topDocs.scoreDocs) { if (index++ < from) { continue; } out.writeVInt(doc.doc); out.writeFloat(doc.score); } } }
/** * Auxiliary method used by the {@link #merge} impls. A sort value of null is used to indicate * that docs should be sorted by score. */ private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits) throws IOException { final PriorityQueue<ShardRef> queue; if (sort == null) { queue = new ScoreMergeSortQueue(shardHits); } else { queue = new MergeSortQueue(sort, shardHits); } int totalHitCount = 0; int availHitCount = 0; float maxScore = Float.MIN_VALUE; for (int shardIDX = 0; shardIDX < shardHits.length; shardIDX++) { final TopDocs shard = shardHits[shardIDX]; // totalHits can be non-zero even if no hits were // collected, when searchAfter was used: totalHitCount += shard.totalHits; if (shard.scoreDocs != null && shard.scoreDocs.length > 0) { availHitCount += shard.scoreDocs.length; queue.add(new ShardRef(shardIDX)); maxScore = Math.max(maxScore, shard.getMaxScore()); // System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore()); } } if (availHitCount == 0) { maxScore = Float.NaN; } final ScoreDoc[] hits; if (availHitCount <= start) { hits = new ScoreDoc[0]; } else { hits = new ScoreDoc[Math.min(size, availHitCount - start)]; int requestedResultWindow = start + size; int numIterOnHits = Math.min(availHitCount, requestedResultWindow); int hitUpto = 0; while (hitUpto < numIterOnHits) { assert queue.size() > 0; ShardRef ref = queue.pop(); final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++]; hit.shardIndex = ref.shardIndex; if (hitUpto >= start) { hits[hitUpto - start] = hit; } // System.out.println(" hitUpto=" + hitUpto); // System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score); hitUpto++; if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) { // Not done with this these TopDocs yet: queue.add(ref); } } } if (sort == null) { return new TopDocs(totalHitCount, hits, maxScore); } else { return new TopFieldDocs(totalHitCount, hits, sort.getSort(), maxScore); } }
/** * @param scrollSort Whether to ignore the from and sort all hits in each shard result. Only used * for scroll search * @param resultsArr Shard result holder */ public ScoreDoc[] sortDocs( boolean scrollSort, AtomicArray<? extends QuerySearchResultProvider> resultsArr) throws IOException { List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> results = resultsArr.asList(); if (results.isEmpty()) { return EMPTY_DOCS; } if (optimizeSingleShard) { boolean canOptimize = false; QuerySearchResult result = null; int shardIndex = -1; if (results.size() == 1) { canOptimize = true; result = results.get(0).value.queryResult(); shardIndex = results.get(0).index; } else { // lets see if we only got hits from a single shard, if so, we can optimize... for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : results) { if (entry.value.queryResult().topDocs().scoreDocs.length > 0) { if (result != null) { // we already have one, can't really optimize canOptimize = false; break; } canOptimize = true; result = entry.value.queryResult(); shardIndex = entry.index; } } } if (canOptimize) { int offset = result.from(); if (scrollSort) { offset = 0; } ScoreDoc[] scoreDocs = result.topDocs().scoreDocs; if (scoreDocs.length == 0 || scoreDocs.length < offset) { return EMPTY_DOCS; } int resultDocsSize = result.size(); if ((scoreDocs.length - offset) < resultDocsSize) { resultDocsSize = scoreDocs.length - offset; } ScoreDoc[] docs = new ScoreDoc[resultDocsSize]; for (int i = 0; i < resultDocsSize; i++) { ScoreDoc scoreDoc = scoreDocs[offset + i]; scoreDoc.shardIndex = shardIndex; docs[i] = scoreDoc; } return docs; } } @SuppressWarnings("unchecked") AtomicArray.Entry<? extends QuerySearchResultProvider>[] sortedResults = results.toArray(new AtomicArray.Entry[results.size()]); Arrays.sort(sortedResults, QUERY_RESULT_ORDERING); QuerySearchResultProvider firstResult = sortedResults[0].value; final Sort sort; if (firstResult.queryResult().topDocs() instanceof TopFieldDocs) { TopFieldDocs firstTopDocs = (TopFieldDocs) firstResult.queryResult().topDocs(); sort = new Sort(firstTopDocs.fields); } else { sort = null; } int topN = firstResult.queryResult().size(); // Need to use the length of the resultsArr array, since the slots will be based on the position // in the resultsArr array TopDocs[] shardTopDocs = new TopDocs[resultsArr.length()]; if (firstResult.includeFetch()) { // if we did both query and fetch on the same go, we have fetched all the docs from each // shards already, use them... // this is also important since we shortcut and fetch only docs from "from" and up to "size" topN *= sortedResults.length; } for (AtomicArray.Entry<? extends QuerySearchResultProvider> sortedResult : sortedResults) { TopDocs topDocs = sortedResult.value.queryResult().topDocs(); // the 'index' field is the position in the resultsArr atomic array shardTopDocs[sortedResult.index] = topDocs; } int from = firstResult.queryResult().from(); if (scrollSort) { from = 0; } // TopDocs#merge can't deal with null shard TopDocs for (int i = 0; i < shardTopDocs.length; i++) { if (shardTopDocs[i] == null) { shardTopDocs[i] = Lucene.EMPTY_TOP_DOCS; } } TopDocs mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); return mergedTopDocs.scoreDocs; }