private void createMainQuery(ResponseBuilder rb) { ShardRequest sreq = new ShardRequest(); sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS; sreq.params = new ModifiableSolrParams(rb.req.getParams()); // TODO: base on current params or original params? // don't pass through any shards param sreq.params.remove(ShardParams.SHARDS); // set the start (offset) to 0 for each shard request so we can properly merge // results from the start. if (rb.shards_start > -1) { // if the client set shards.start set this explicitly sreq.params.set(CommonParams.START, rb.shards_start); } else { sreq.params.set(CommonParams.START, "0"); } // TODO: should we even use the SortSpec? That's obtained from the QParser, and // perhaps we shouldn't attempt to parse the query at this level? // Alternate Idea: instead of specifying all these things at the upper level, // we could just specify that this is a shard request. if (rb.shards_rows > -1) { // if the client set shards.rows set this explicity sreq.params.set(CommonParams.ROWS, rb.shards_rows); } else { sreq.params.set( CommonParams.ROWS, rb.getSortSpec().getOffset() + rb.getSortSpec().getCount()); } // in this first phase, request only the unique key field // and any fields needed for merging. sreq.params.set(ResponseBuilder.FIELD_SORT_VALUES, "true"); if ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0 || rb.getSortSpec().includesScore()) { sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score"); } else { sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName()); } rb.addRequest(this, sreq); }
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) { SortSpec ss = rb.getSortSpec(); Sort sort = ss.getSort(); SortField[] sortFields = null; if (sort != null) sortFields = sort.getSort(); else { sortFields = new SortField[] {SortField.FIELD_SCORE}; } SchemaField uniqueKeyField = rb.req.getSchema().getUniqueKeyField(); // id to shard mapping, to eliminate any accidental dups HashMap<Object, String> uniqueDoc = new HashMap<Object, String>(); // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) ShardFieldSortedHitQueue queue; queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount()); long numFound = 0; Float maxScore = null; for (ShardResponse srsp : sreq.responses) { SolrDocumentList docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response"); // calculate global maxScore and numDocsFound if (docs.getMaxScore() != null) { maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore()); } numFound += docs.getNumFound(); NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values")); // go through every doc in this response, construct a ShardDoc, and // put it in the priority queue so it can be ordered. for (int i = 0; i < docs.size(); i++) { SolrDocument doc = docs.get(i); Object id = doc.getFieldValue(uniqueKeyField.getName()); String prevShard = uniqueDoc.put(id, srsp.getShard()); if (prevShard != null) { // duplicate detected numFound--; // For now, just always use the first encountered since we can't currently // remove the previous one added to the priority queue. If we switched // to the Java5 PriorityQueue, this would be easier. continue; // make which duplicate is used deterministic based on shard // if (prevShard.compareTo(srsp.shard) >= 0) { // TODO: remove previous from priority queue // continue; // } } ShardDoc shardDoc = new ShardDoc(); shardDoc.id = id; shardDoc.shard = srsp.getShard(); shardDoc.orderInShard = i; Object scoreObj = doc.getFieldValue("score"); if (scoreObj != null) { if (scoreObj instanceof String) { shardDoc.score = Float.parseFloat((String) scoreObj); } else { shardDoc.score = (Float) scoreObj; } } shardDoc.sortFieldValues = sortFieldValues; queue.insertWithOverflow(shardDoc); } // end for-each-doc-in-response } // end for-each-response // The queue now has 0 -> queuesize docs, where queuesize <= start + rows // So we want to pop the last documents off the queue to get // the docs offset -> queuesize int resultSize = queue.size() - ss.getOffset(); resultSize = Math.max(0, resultSize); // there may not be any docs in range Map<Object, ShardDoc> resultIds = new HashMap<Object, ShardDoc>(); for (int i = resultSize - 1; i >= 0; i--) { ShardDoc shardDoc = (ShardDoc) queue.pop(); shardDoc.positionInResponse = i; // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) resultIds.put(shardDoc.id.toString(), shardDoc); } SolrDocumentList responseDocs = new SolrDocumentList(); if (maxScore != null) responseDocs.setMaxScore(maxScore); responseDocs.setNumFound(numFound); responseDocs.setStart(ss.getOffset()); // size appropriately for (int i = 0; i < resultSize; i++) responseDocs.add(null); // save these results in a private area so we can access them // again when retrieving stored fields. // TODO: use ResponseBuilder (w/ comments) or the request context? rb.resultIds = resultIds; rb._responseDocs = responseDocs; }
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { Sort sort = rb.getSortSpec().getSort(); SortField[] sortFields = sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort(); NamedList sortVals = new NamedList(); // order is important for the sort fields Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field SolrIndexReader reader = searcher.getReader(); SolrIndexReader[] readers = reader.getLeafReaders(); SolrIndexReader subReader = reader; if (readers.length == 1) { // if there is a single segment, use that subReader and avoid looking up each time subReader = readers[0]; readers = null; } int[] offsets = reader.getLeafOffsets(); for (SortField sortField : sortFields) { int type = sortField.getType(); if (type == SortField.SCORE || type == SortField.DOC) continue; FieldComparator comparator = null; FieldComparator comparators[] = (readers == null) ? null : new FieldComparator[readers.length]; String fieldname = sortField.getField(); FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname); DocList docList = rb.getResults().docList; ArrayList<Object> vals = new ArrayList<Object>(docList.size()); DocIterator it = rb.getResults().docList.iterator(); int offset = 0; int idx = 0; while (it.hasNext()) { int doc = it.nextDoc(); if (readers != null) { idx = SolrIndexReader.readerIndex(doc, offsets); subReader = readers[idx]; offset = offsets[idx]; comparator = comparators[idx]; } if (comparator == null) { comparator = sortField.getComparator(1, 0); comparator = comparator.setNextReader(subReader, offset); if (comparators != null) comparators[idx] = comparator; } doc -= offset; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); // Sortable float, double, int, long types all just use a string // comparator. For these, we need to put the type into a readable // format. One reason for this is that XML can't represent all // string values (or even all unicode code points). // indexedToReadable() should be a no-op and should // thus be harmless anyway (for all current ways anyway) if (val instanceof String) { field.setValue((String) val); val = ft.toObject(field); } // Must do the same conversion when sorting by a // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { field.setValue(((BytesRef) val).utf8ToString()); val = ft.toObject(field); } vals.add(val); } sortVals.add(fieldname, vals); } rsp.add("sort_values", sortVals); } }