public ReRankCollector( int reRankDocs, int length, Query reRankQuery, double reRankWeight, SolrIndexSearcher.QueryCommand cmd, IndexSearcher searcher, Map<BytesRef, Integer> boostedPriority, boolean scale) throws IOException { super(null); this.reRankQuery = reRankQuery; this.reRankDocs = reRankDocs; this.length = length; this.boostedPriority = boostedPriority; this.scale = scale; Sort sort = cmd.getSort(); if (sort == null) { this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), true); } else { sort = sort.rewrite(searcher); this.mainCollector = TopFieldCollector.create( sort, Math.max(this.reRankDocs, length), false, true, true, true); } this.searcher = searcher; this.reRankWeight = reRankWeight; }
public Query parse() throws SyntaxError { String reRankQueryString = localParams.get("reRankQuery"); boolean scale = localParams.getBool("scale", false); QParser reRankParser = QParser.getParser(reRankQueryString, null, req); Query reRankQuery = reRankParser.parse(); int reRankDocs = localParams.getInt("reRankDocs", 200); reRankDocs = Math.max(1, reRankDocs); // double reRankWeight = localParams.getDouble("reRankWeight", 2.0d); int start = params.getInt(CommonParams.START, 0); int rows = params.getInt(CommonParams.ROWS, 10); int length = start + rows; return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight, length, scale); }
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) { SortSpec ss = rb.getSortSpec(); Sort sort = ss.getSort(); SortField[] sortFields = null; if (sort != null) sortFields = sort.getSort(); else { sortFields = new SortField[] {SortField.FIELD_SCORE}; } SchemaField uniqueKeyField = rb.req.getSchema().getUniqueKeyField(); // id to shard mapping, to eliminate any accidental dups HashMap<Object, String> uniqueDoc = new HashMap<Object, String>(); // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) ShardFieldSortedHitQueue queue; queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount()); long numFound = 0; Float maxScore = null; for (ShardResponse srsp : sreq.responses) { SolrDocumentList docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response"); // calculate global maxScore and numDocsFound if (docs.getMaxScore() != null) { maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore()); } numFound += docs.getNumFound(); NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values")); // go through every doc in this response, construct a ShardDoc, and // put it in the priority queue so it can be ordered. for (int i = 0; i < docs.size(); i++) { SolrDocument doc = docs.get(i); Object id = doc.getFieldValue(uniqueKeyField.getName()); String prevShard = uniqueDoc.put(id, srsp.getShard()); if (prevShard != null) { // duplicate detected numFound--; // For now, just always use the first encountered since we can't currently // remove the previous one added to the priority queue. If we switched // to the Java5 PriorityQueue, this would be easier. continue; // make which duplicate is used deterministic based on shard // if (prevShard.compareTo(srsp.shard) >= 0) { // TODO: remove previous from priority queue // continue; // } } ShardDoc shardDoc = new ShardDoc(); shardDoc.id = id; shardDoc.shard = srsp.getShard(); shardDoc.orderInShard = i; Object scoreObj = doc.getFieldValue("score"); if (scoreObj != null) { if (scoreObj instanceof String) { shardDoc.score = Float.parseFloat((String) scoreObj); } else { shardDoc.score = (Float) scoreObj; } } shardDoc.sortFieldValues = sortFieldValues; queue.insertWithOverflow(shardDoc); } // end for-each-doc-in-response } // end for-each-response // The queue now has 0 -> queuesize docs, where queuesize <= start + rows // So we want to pop the last documents off the queue to get // the docs offset -> queuesize int resultSize = queue.size() - ss.getOffset(); resultSize = Math.max(0, resultSize); // there may not be any docs in range Map<Object, ShardDoc> resultIds = new HashMap<Object, ShardDoc>(); for (int i = resultSize - 1; i >= 0; i--) { ShardDoc shardDoc = (ShardDoc) queue.pop(); shardDoc.positionInResponse = i; // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) resultIds.put(shardDoc.id.toString(), shardDoc); } SolrDocumentList responseDocs = new SolrDocumentList(); if (maxScore != null) responseDocs.setMaxScore(maxScore); responseDocs.setNumFound(numFound); responseDocs.setStart(ss.getOffset()); // size appropriately for (int i = 0; i < resultSize; i++) responseDocs.add(null); // save these results in a private area so we can access them // again when retrieving stored fields. // TODO: use ResponseBuilder (w/ comments) or the request context? rb.resultIds = resultIds; rb._responseDocs = responseDocs; }
public TopDocs topDocs(int start, int howMany) { try { TopDocs mainDocs = mainCollector.topDocs(0, Math.max(reRankDocs, length)); if (mainDocs.totalHits == 0 || mainDocs.scoreDocs.length == 0) { return mainDocs; } if (boostedPriority != null) { SolrRequestInfo info = SolrRequestInfo.getRequestInfo(); Map requestContext = null; if (info != null) { requestContext = info.getReq().getContext(); } IntIntOpenHashMap boostedDocs = QueryElevationComponent.getBoostDocs( (SolrIndexSearcher) searcher, boostedPriority, requestContext); ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs; ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)]; System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length); mainDocs.scoreDocs = reRankScoreDocs; Map<Integer, Float> scoreMap = getScoreMap(mainDocs.scoreDocs, mainDocs.scoreDocs.length); TopDocs rescoredDocs = new QueryRescorer(reRankQuery) { @Override protected float combine( float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score += reRankWeight * secondPassScore; } return score; } }.rescore(searcher, mainDocs, mainDocs.scoreDocs.length); Arrays.sort( rescoredDocs.scoreDocs, new BoostedComp(boostedDocs, mainDocs.scoreDocs, rescoredDocs.getMaxScore())); // Lower howMany if we've collected fewer documents. howMany = Math.min(howMany, mainScoreDocs.length); if (howMany == rescoredDocs.scoreDocs.length) { if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; // Just return the rescoredDocs } else if (howMany > rescoredDocs.scoreDocs.length) { // We need to return more then we've reRanked, so create the combined page. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy( mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length); // lay down the initial docs System.arraycopy( rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length); // overlay the re-ranked docs. rescoredDocs.scoreDocs = scoreDocs; if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } else { // We've rescored more then we need to return. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany); rescoredDocs.scoreDocs = scoreDocs; if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } } else { ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs; /* * Create the array for the reRankScoreDocs. */ ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)]; /* * Copy the initial results into the reRankScoreDocs array. */ System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length); mainDocs.scoreDocs = reRankScoreDocs; Map<Integer, Float> scoreMap = getScoreMap(mainDocs.scoreDocs, mainDocs.scoreDocs.length); TopDocs rescoredDocs = new QueryRescorer(reRankQuery) { @Override protected float combine( float firstPassScore, boolean secondPassMatches, float secondPassScore) { float score = firstPassScore; if (secondPassMatches) { score += reRankWeight * secondPassScore; } return score; } }.rescore(searcher, mainDocs, mainDocs.scoreDocs.length); // Lower howMany to return if we've collected fewer documents. howMany = Math.min(howMany, mainScoreDocs.length); if (howMany == rescoredDocs.scoreDocs.length) { if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; // Just return the rescoredDocs } else if (howMany > rescoredDocs.scoreDocs.length) { // We need to return more then we've reRanked, so create the combined page. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; // lay down the initial docs System.arraycopy(mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length); // overlay the rescoreds docs System.arraycopy( rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length); rescoredDocs.scoreDocs = scoreDocs; if (scale) { assert (scoreMap != null); scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } else { // We've rescored more then we need to return. ScoreDoc[] scoreDocs = new ScoreDoc[howMany]; System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany); rescoredDocs.scoreDocs = scoreDocs; if (scale) { scaleScores(rescoredDocs, scoreMap); } return rescoredDocs; } } } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } }