/**
   * Expert: highlights the top-N passages from multiple fields, for the provided int[] docids, to
   * custom Object as returned by the {@link PassageFormatter}. Use this API to render to something
   * other than String.
   *
   * @param fieldsIn field names to highlight. Must have a stored string value and also be indexed
   *     with offsets.
   * @param query query to highlight.
   * @param searcher searcher that was previously used to execute the query.
   * @param docidsIn containing the document IDs to highlight.
   * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to form the
   *     highlighted snippets.
   * @return Map keyed on field name, containing the array of formatted snippets corresponding to
   *     the documents in <code>docidsIn</code>. If no highlights were found for a document, the
   *     first {@code maxPassages} from the field will be returned.
   * @throws IOException if an I/O error occurred during processing
   * @throws IllegalArgumentException if <code>field</code> was indexed without {@link
   *     IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
   */
  protected Map<String, Object[]> highlightFieldsAsObjects(
      String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[])
      throws IOException {
    if (fieldsIn.length < 1) {
      throw new IllegalArgumentException("fieldsIn must not be empty");
    }
    if (fieldsIn.length != maxPassagesIn.length) {
      throw new IllegalArgumentException("invalid number of maxPassagesIn");
    }
    final IndexReader reader = searcher.getIndexReader();
    Query rewritten = rewrite(query);
    SortedSet<Term> queryTerms = new TreeSet<>();
    rewritten.extractTerms(queryTerms);

    IndexReaderContext readerContext = reader.getContext();
    List<AtomicReaderContext> leaves = readerContext.leaves();

    // Make our own copies because we sort in-place:
    int[] docids = new int[docidsIn.length];
    System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
    final String fields[] = new String[fieldsIn.length];
    System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
    final int maxPassages[] = new int[maxPassagesIn.length];
    System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);

    // sort for sequential io
    Arrays.sort(docids);
    new InPlaceMergeSorter() {

      @Override
      protected void swap(int i, int j) {
        String tmp = fields[i];
        fields[i] = fields[j];
        fields[j] = tmp;
        int tmp2 = maxPassages[i];
        maxPassages[i] = maxPassages[j];
        maxPassages[j] = tmp2;
      }

      @Override
      protected int compare(int i, int j) {
        return fields[i].compareTo(fields[j]);
      }
    }.sort(0, fields.length);

    // pull stored data:
    String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);

    Map<String, Object[]> highlights = new HashMap<>();
    for (int i = 0; i < fields.length; i++) {
      String field = fields[i];
      int numPassages = maxPassages[i];
      Term floor = new Term(field, "");
      Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
      SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
      // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)

      // Strip off the redundant field:
      BytesRef terms[] = new BytesRef[fieldTerms.size()];
      int termUpto = 0;
      for (Term term : fieldTerms) {
        terms[termUpto++] = term.bytes();
      }
      Map<Integer, Object> fieldHighlights =
          highlightField(
              field,
              contents[i],
              getBreakIterator(field),
              terms,
              docids,
              leaves,
              numPassages,
              query);

      Object[] result = new Object[docids.length];
      for (int j = 0; j < docidsIn.length; j++) {
        result[j] = fieldHighlights.get(docidsIn[j]);
      }
      highlights.put(field, result);
    }
    return highlights;
  }
Ejemplo n.º 2
0
    public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher)
        throws IOException {
      SolrQueryRequest req = rb.req;
      SolrQueryResponse rsp = rb.rsp;
      // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
      // currently have an option to return sort field values.  Because of this, we
      // take the documents given and re-derive the sort values.
      //
      // TODO: See SOLR-5595
      boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
      if (fsv) {
        NamedList<Object[]> sortVals = new NamedList<>(); // order is important for the sort fields
        IndexReaderContext topReaderContext = searcher.getTopReaderContext();
        List<LeafReaderContext> leaves = topReaderContext.leaves();
        LeafReaderContext currentLeaf = null;
        if (leaves.size() == 1) {
          // if there is a single segment, use that subReader and avoid looking up each time
          currentLeaf = leaves.get(0);
          leaves = null;
        }

        DocList docList = rb.getResults().docList;

        // sort ids from lowest to highest so we can access them in order
        int nDocs = docList.size();
        final long[] sortedIds = new long[nDocs];
        final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds
        DocList docs = rb.getResults().docList;
        DocIterator it = docs.iterator();
        for (int i = 0; i < nDocs; i++) {
          sortedIds[i] = (((long) it.nextDoc()) << 32) | i;
          scores[i] = docs.hasScores() ? it.score() : Float.NaN;
        }

        // sort ids and scores together
        new InPlaceMergeSorter() {
          @Override
          protected void swap(int i, int j) {
            long tmpId = sortedIds[i];
            float tmpScore = scores[i];
            sortedIds[i] = sortedIds[j];
            scores[i] = scores[j];
            sortedIds[j] = tmpId;
            scores[j] = tmpScore;
          }

          @Override
          protected int compare(int i, int j) {
            return Long.compare(sortedIds[i], sortedIds[j]);
          }
        }.sort(0, sortedIds.length);

        SortSpec sortSpec = rb.getSortSpec();
        Sort sort = searcher.weightSort(sortSpec.getSort());
        SortField[] sortFields =
            sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort();
        List<SchemaField> schemaFields = sortSpec.getSchemaFields();

        for (int fld = 0; fld < schemaFields.size(); fld++) {
          SchemaField schemaField = schemaFields.get(fld);
          FieldType ft = null == schemaField ? null : schemaField.getType();
          SortField sortField = sortFields[fld];

          SortField.Type type = sortField.getType();
          // :TODO: would be simpler to always serialize every position of SortField[]
          if (type == SortField.Type.SCORE || type == SortField.Type.DOC) continue;

          FieldComparator<?> comparator = null;
          LeafFieldComparator leafComparator = null;
          Object[] vals = new Object[nDocs];

          int lastIdx = -1;
          int idx = 0;

          for (int i = 0; i < sortedIds.length; ++i) {
            long idAndPos = sortedIds[i];
            float score = scores[i];
            int doc = (int) (idAndPos >>> 32);
            int position = (int) idAndPos;

            if (leaves != null) {
              idx = ReaderUtil.subIndex(doc, leaves);
              currentLeaf = leaves.get(idx);
              if (idx != lastIdx) {
                // we switched segments.  invalidate comparator.
                comparator = null;
              }
            }

            if (comparator == null) {
              comparator = sortField.getComparator(1, 0);
              leafComparator = comparator.getLeafComparator(currentLeaf);
            }

            doc -= currentLeaf.docBase; // adjust for what segment this is in
            leafComparator.setScorer(new FakeScorer(doc, score));
            leafComparator.copy(0, doc);
            Object val = comparator.value(0);
            if (null != ft) val = ft.marshalSortValue(val);
            vals[position] = val;
          }

          sortVals.add(sortField.getField(), vals);
        }

        rsp.add("merge_values", sortVals);
      }
    }