コード例 #1
0
 protected void doPrefetch(ResponseBuilder rb) throws IOException {
   SolrQueryRequest req = rb.req;
   SolrQueryResponse rsp = rb.rsp;
   // pre-fetch returned documents
   if (!req.getParams().getBool(ShardParams.IS_SHARD, false)
       && rb.getResults().docList != null
       && rb.getResults().docList.size() <= 50) {
     // TODO: this may depend on the highlighter component (or other components?)
     SolrPluginUtils.optimizePreFetchDocs(rb.getResults().docList, rb.getQuery(), req, rsp);
   }
 }
コード例 #2
0
  private void returnFields(ResponseBuilder rb, ShardRequest sreq) {
    // Keep in mind that this could also be a shard in a multi-tiered system.
    // TODO: if a multi-tiered system, it seems like some requests
    // could/should bypass middlemen (like retrieving stored fields)
    // TODO: merge fsv to if requested

    if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
      boolean returnScores = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0;

      assert (sreq.responses.size() == 1);
      ShardResponse srsp = sreq.responses.get(0);
      SolrDocumentList docs =
          (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");

      String keyFieldName = rb.req.getSchema().getUniqueKeyField().getName();

      for (SolrDocument doc : docs) {
        Object id = doc.getFieldValue(keyFieldName);
        ShardDoc sdoc = rb.resultIds.get(id.toString());
        if (sdoc != null) {
          if (returnScores && sdoc.score != null) {
            doc.setField("score", sdoc.score);
          }
          rb._responseDocs.set(sdoc.positionInResponse, doc);
        }
      }
    }
  }
コード例 #3
0
  private void createRetrieveDocs(ResponseBuilder rb) {

    // TODO: in a system with nTiers > 2, we could be passed "ids" here
    // unless those requests always go to the final destination shard

    // for each shard, collect the documents for that shard.
    HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>();
    for (ShardDoc sdoc : rb.resultIds.values()) {
      Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
      if (shardDocs == null) {
        shardDocs = new ArrayList<ShardDoc>();
        shardMap.put(sdoc.shard, shardDocs);
      }
      shardDocs.add(sdoc);
    }

    SchemaField uniqueField = rb.req.getSchema().getUniqueKeyField();

    // Now create a request for each shard to retrieve the stored fields
    for (Collection<ShardDoc> shardDocs : shardMap.values()) {
      ShardRequest sreq = new ShardRequest();
      sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;

      sreq.shards = new String[] {shardDocs.iterator().next().shard};

      sreq.params = new ModifiableSolrParams();

      // add original params
      sreq.params.add(rb.req.getParams());

      // no need for a sort, we already have order
      sreq.params.remove(CommonParams.SORT);

      // we already have the field sort values
      sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES);

      // make sure that the id is returned for correlation.
      String fl = sreq.params.get(CommonParams.FL);
      if (fl != null) {
        fl = fl.trim();
        // currently, "score" is synonymous with "*,score" so
        // don't add "id" if the fl is empty or "score" or it would change the meaning.
        if (fl.length() != 0 && !"score".equals(fl) && !"*".equals(fl)) {
          sreq.params.set(CommonParams.FL, fl + ',' + uniqueField.getName());
        }
      }

      ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
      for (ShardDoc shardDoc : shardDocs) {
        // TODO: depending on the type, we may need more tha a simple toString()?
        ids.add(shardDoc.id.toString());
      }
      sreq.params.add(ShardParams.IDS, StrUtils.join(ids, ','));

      rb.addRequest(this, sreq);
    }
  }
コード例 #4
0
  private void createMainQuery(ResponseBuilder rb) {
    ShardRequest sreq = new ShardRequest();
    sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS;

    sreq.params = new ModifiableSolrParams(rb.req.getParams());
    // TODO: base on current params or original params?

    // don't pass through any shards param
    sreq.params.remove(ShardParams.SHARDS);

    // set the start (offset) to 0 for each shard request so we can properly merge
    // results from the start.
    if (rb.shards_start > -1) {
      // if the client set shards.start set this explicitly
      sreq.params.set(CommonParams.START, rb.shards_start);
    } else {
      sreq.params.set(CommonParams.START, "0");
    }
    // TODO: should we even use the SortSpec?  That's obtained from the QParser, and
    // perhaps we shouldn't attempt to parse the query at this level?
    // Alternate Idea: instead of specifying all these things at the upper level,
    // we could just specify that this is a shard request.
    if (rb.shards_rows > -1) {
      // if the client set shards.rows set this explicity
      sreq.params.set(CommonParams.ROWS, rb.shards_rows);
    } else {
      sreq.params.set(
          CommonParams.ROWS, rb.getSortSpec().getOffset() + rb.getSortSpec().getCount());
    }

    // in this first phase, request only the unique key field
    // and any fields needed for merging.
    sreq.params.set(ResponseBuilder.FIELD_SORT_VALUES, "true");

    if ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0
        || rb.getSortSpec().includesScore()) {
      sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
    } else {
      sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName());
    }

    rb.addRequest(this, sreq);
  }
コード例 #5
0
  @Override
  public void prepare(ResponseBuilder rb) throws IOException {

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }
    SolrQueryResponse rsp = rb.rsp;

    // Set field flags
    String fl = params.get(CommonParams.FL);
    int fieldFlags = 0;
    if (fl != null) {
      fieldFlags |= SolrPluginUtils.setReturnFields(fl, rsp);
    }
    rb.setFieldFlags(fieldFlags);

    String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);

    // get it from the response builder to give a different component a chance
    // to set it.
    String queryString = rb.getQueryString();
    if (queryString == null) {
      // this is the normal way it's set.
      queryString = params.get(CommonParams.Q);
      rb.setQueryString(queryString);
    }

    try {
      QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
      Query q = parser.getQuery();
      if (q == null) {
        // normalize a null query to a query that matches nothing
        q = new BooleanQuery();
      }
      rb.setQuery(q);
      rb.setSortSpec(parser.getSort(true));
      rb.setQparser(parser);

      String[] fqs = req.getParams().getParams(CommonParams.FQ);
      if (fqs != null && fqs.length != 0) {
        List<Query> filters = rb.getFilters();
        if (filters == null) {
          filters = new ArrayList<Query>();
          rb.setFilters(filters);
        }
        for (String fq : fqs) {
          if (fq != null && fq.trim().length() != 0) {
            QParser fqp = QParser.getParser(fq, null, req);
            filters.add(fqp.getQuery());
          }
        }
      }
    } catch (ParseException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
    }

    // TODO: temporary... this should go in a different component.
    String shards = params.get(ShardParams.SHARDS);
    if (shards != null) {
      List<String> lst = StrUtils.splitSmart(shards, ",", true);
      rb.shards = lst.toArray(new String[lst.size()]);
    }
    String shards_rows = params.get(ShardParams.SHARDS_ROWS);
    if (shards_rows != null) {
      rb.shards_rows = Integer.parseInt(shards_rows);
    }
    String shards_start = params.get(ShardParams.SHARDS_START);
    if (shards_start != null) {
      rb.shards_start = Integer.parseInt(shards_start);
    }
  }
コード例 #6
0
  private void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
    SortSpec ss = rb.getSortSpec();
    Sort sort = ss.getSort();

    SortField[] sortFields = null;
    if (sort != null) sortFields = sort.getSort();
    else {
      sortFields = new SortField[] {SortField.FIELD_SCORE};
    }

    SchemaField uniqueKeyField = rb.req.getSchema().getUniqueKeyField();

    // id to shard mapping, to eliminate any accidental dups
    HashMap<Object, String> uniqueDoc = new HashMap<Object, String>();

    // Merge the docs via a priority queue so we don't have to sort *all* of the
    // documents... we only need to order the top (rows+start)
    ShardFieldSortedHitQueue queue;
    queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount());

    long numFound = 0;
    Float maxScore = null;
    for (ShardResponse srsp : sreq.responses) {
      SolrDocumentList docs =
          (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");

      // calculate global maxScore and numDocsFound
      if (docs.getMaxScore() != null) {
        maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
      }
      numFound += docs.getNumFound();

      NamedList sortFieldValues =
          (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values"));

      // go through every doc in this response, construct a ShardDoc, and
      // put it in the priority queue so it can be ordered.
      for (int i = 0; i < docs.size(); i++) {
        SolrDocument doc = docs.get(i);
        Object id = doc.getFieldValue(uniqueKeyField.getName());

        String prevShard = uniqueDoc.put(id, srsp.getShard());
        if (prevShard != null) {
          // duplicate detected
          numFound--;

          // For now, just always use the first encountered since we can't currently
          // remove the previous one added to the priority queue.  If we switched
          // to the Java5 PriorityQueue, this would be easier.
          continue;
          // make which duplicate is used deterministic based on shard
          // if (prevShard.compareTo(srsp.shard) >= 0) {
          //  TODO: remove previous from priority queue
          //  continue;
          // }
        }

        ShardDoc shardDoc = new ShardDoc();
        shardDoc.id = id;
        shardDoc.shard = srsp.getShard();
        shardDoc.orderInShard = i;
        Object scoreObj = doc.getFieldValue("score");
        if (scoreObj != null) {
          if (scoreObj instanceof String) {
            shardDoc.score = Float.parseFloat((String) scoreObj);
          } else {
            shardDoc.score = (Float) scoreObj;
          }
        }

        shardDoc.sortFieldValues = sortFieldValues;

        queue.insertWithOverflow(shardDoc);
      } // end for-each-doc-in-response
    } // end for-each-response

    // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
    // So we want to pop the last documents off the queue to get
    // the docs offset -> queuesize
    int resultSize = queue.size() - ss.getOffset();
    resultSize = Math.max(0, resultSize); // there may not be any docs in range

    Map<Object, ShardDoc> resultIds = new HashMap<Object, ShardDoc>();
    for (int i = resultSize - 1; i >= 0; i--) {
      ShardDoc shardDoc = (ShardDoc) queue.pop();
      shardDoc.positionInResponse = i;
      // Need the toString() for correlation with other lists that must
      // be strings (like keys in highlighting, explain, etc)
      resultIds.put(shardDoc.id.toString(), shardDoc);
    }

    SolrDocumentList responseDocs = new SolrDocumentList();
    if (maxScore != null) responseDocs.setMaxScore(maxScore);
    responseDocs.setNumFound(numFound);
    responseDocs.setStart(ss.getOffset());
    // size appropriately
    for (int i = 0; i < resultSize; i++) responseDocs.add(null);

    // save these results in a private area so we can access them
    // again when retrieving stored fields.
    // TODO: use ResponseBuilder (w/ comments) or the request context?
    rb.resultIds = resultIds;
    rb._responseDocs = responseDocs;
  }
コード例 #7
0
  protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher)
      throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;

    // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
    // currently have an option to return sort field values.  Because of this, we
    // take the documents given and re-derive the sort values.
    boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
    if (fsv) {
      Sort sort = rb.getSortSpec().getSort();
      SortField[] sortFields =
          sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort();
      NamedList sortVals = new NamedList(); // order is important for the sort fields
      Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO); // a dummy Field

      SolrIndexReader reader = searcher.getReader();
      SolrIndexReader[] readers = reader.getLeafReaders();
      SolrIndexReader subReader = reader;
      if (readers.length == 1) {
        // if there is a single segment, use that subReader and avoid looking up each time
        subReader = readers[0];
        readers = null;
      }
      int[] offsets = reader.getLeafOffsets();

      for (SortField sortField : sortFields) {
        int type = sortField.getType();
        if (type == SortField.SCORE || type == SortField.DOC) continue;

        FieldComparator comparator = null;
        FieldComparator comparators[] =
            (readers == null) ? null : new FieldComparator[readers.length];

        String fieldname = sortField.getField();
        FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname);

        DocList docList = rb.getResults().docList;
        ArrayList<Object> vals = new ArrayList<Object>(docList.size());
        DocIterator it = rb.getResults().docList.iterator();

        int offset = 0;
        int idx = 0;

        while (it.hasNext()) {
          int doc = it.nextDoc();
          if (readers != null) {
            idx = SolrIndexReader.readerIndex(doc, offsets);
            subReader = readers[idx];
            offset = offsets[idx];
            comparator = comparators[idx];
          }

          if (comparator == null) {
            comparator = sortField.getComparator(1, 0);
            comparator = comparator.setNextReader(subReader, offset);
            if (comparators != null) comparators[idx] = comparator;
          }

          doc -= offset; // adjust for what segment this is in
          comparator.copy(0, doc);
          Object val = comparator.value(0);

          // Sortable float, double, int, long types all just use a string
          // comparator. For these, we need to put the type into a readable
          // format.  One reason for this is that XML can't represent all
          // string values (or even all unicode code points).
          // indexedToReadable() should be a no-op and should
          // thus be harmless anyway (for all current ways anyway)
          if (val instanceof String) {
            field.setValue((String) val);
            val = ft.toObject(field);
          }

          // Must do the same conversion when sorting by a
          // String field in Lucene, which returns the terms
          // data as BytesRef:
          if (val instanceof BytesRef) {
            field.setValue(((BytesRef) val).utf8ToString());
            val = ft.toObject(field);
          }

          vals.add(val);
        }

        sortVals.add(fieldname, vals);
      }

      rsp.add("sort_values", sortVals);
    }
  }
コード例 #8
0
  /** Actually run the query */
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }
    SolrIndexSearcher searcher = req.getSearcher();

    if (rb.getQueryCommand().getOffset() < 0) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
    }

    // -1 as flag if not set.
    long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1);

    // Optional: This could also be implemented by the top-level searcher sending
    // a filter that lists the ids... that would be transparent to
    // the request handler, but would be more expensive (and would preserve score
    // too if desired).
    String ids = params.get(ShardParams.IDS);
    if (ids != null) {
      SchemaField idField = req.getSchema().getUniqueKeyField();
      List<String> idArr = StrUtils.splitSmart(ids, ",", true);
      int[] luceneIds = new int[idArr.size()];
      int docs = 0;
      for (int i = 0; i < idArr.size(); i++) {
        int id =
            req.getSearcher()
                .getFirstMatch(
                    new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
        if (id >= 0) luceneIds[docs++] = id;
      }

      DocListAndSet res = new DocListAndSet();
      res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
      if (rb.isNeedDocSet()) {
        List<Query> queries = new ArrayList<Query>();
        queries.add(rb.getQuery());
        List<Query> filters = rb.getFilters();
        if (filters != null) queries.addAll(filters);
        res.docSet = searcher.getDocSet(queries);
      }
      rb.setResults(res);
      rsp.add("response", rb.getResults().docList);
      return;
    }

    SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
    cmd.setTimeAllowed(timeAllowed);
    SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();

    //
    // grouping / field collapsing
    //
    boolean doGroup = params.getBool(GroupParams.GROUP, false);
    if (doGroup) {
      try {
        cmd.groupCommands = new ArrayList<Grouping.Command>();

        String[] fields = params.getParams(GroupParams.GROUP_FIELD);
        String[] funcs = params.getParams(GroupParams.GROUP_FUNC);
        String[] queries = params.getParams(GroupParams.GROUP_QUERY);
        String groupSortStr = params.get(GroupParams.GROUP_SORT);
        Sort groupSort =
            groupSortStr != null ? QueryParsing.parseSort(groupSortStr, req.getSchema()) : null;

        int limitDefault = cmd.getLen(); // this is normally from "rows"
        int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1);

        // temporary: implement all group-by-field as group-by-func
        if (funcs == null) {
          funcs = fields;
        } else if (fields != null) {
          // catenate functions and fields
          String[] both = new String[fields.length + funcs.length];
          System.arraycopy(fields, 0, both, 0, fields.length);
          System.arraycopy(funcs, 0, both, fields.length, funcs.length);
          funcs = both;
        }

        if (funcs != null) {
          for (String groupByStr : funcs) {
            QParser parser = QParser.getParser(groupByStr, "func", rb.req);
            Query q = parser.getQuery();
            Grouping.CommandFunc gc = new Grouping.CommandFunc();
            gc.groupSort = groupSort;

            if (q instanceof FunctionQuery) {
              gc.groupBy = ((FunctionQuery) q).getValueSource();
            } else {
              gc.groupBy = new QueryValueSource(q, 0.0f);
            }
            gc.key = groupByStr;
            gc.groupLimit = limitDefault;
            gc.docsPerGroup = docsPerGroupDefault;

            cmd.groupCommands.add(gc);
          }
        }

        if (cmd.groupCommands.size() == 0) cmd.groupCommands = null;

        if (cmd.groupCommands != null) {
          if (rb.doHighlights || rb.isDebug()) {
            // we need a single list of the returned docs
            cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
          }

          searcher.search(result, cmd);
          rb.setResult(result);
          rsp.add("grouped", result.groupedResults);
          // TODO: get "hits" a different way to log
          return;
        }
      } catch (ParseException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }
    }

    // normal search result
    searcher.search(result, cmd);
    rb.setResult(result);

    rsp.add("response", rb.getResults().docList);
    rsp.getToLog().add("hits", rb.getResults().docList.matches());

    doFieldSortValues(rb, searcher);
    doPrefetch(rb);
  }