/** * Expert: highlights the top-N passages from multiple fields, for the provided int[] docids, to * custom Object as returned by the {@link PassageFormatter}. Use this API to render to something * other than String. * * @param fieldsIn field names to highlight. Must have a stored string value and also be indexed * with offsets. * @param query query to highlight. * @param searcher searcher that was previously used to execute the query. * @param docidsIn containing the document IDs to highlight. * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to form the * highlighted snippets. * @return Map keyed on field name, containing the array of formatted snippets corresponding to * the documents in <code>docidsIn</code>. If no highlights were found for a document, the * first {@code maxPassages} from the field will be returned. * @throws IOException if an I/O error occurred during processing * @throws IllegalArgumentException if <code>field</code> was indexed without {@link * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} */ protected Map<String, Object[]> highlightFieldsAsObjects( String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException { if (fieldsIn.length < 1) { throw new IllegalArgumentException("fieldsIn must not be empty"); } if (fieldsIn.length != maxPassagesIn.length) { throw new IllegalArgumentException("invalid number of maxPassagesIn"); } final IndexReader reader = searcher.getIndexReader(); Query rewritten = rewrite(query); SortedSet<Term> queryTerms = new TreeSet<>(); rewritten.extractTerms(queryTerms); IndexReaderContext readerContext = reader.getContext(); List<AtomicReaderContext> leaves = readerContext.leaves(); // Make our own copies because we sort in-place: int[] docids = new int[docidsIn.length]; System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length); final String fields[] = new String[fieldsIn.length]; System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length); final int maxPassages[] = new int[maxPassagesIn.length]; System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length); // sort for sequential io Arrays.sort(docids); new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { String tmp = fields[i]; fields[i] = fields[j]; fields[j] = tmp; int tmp2 = maxPassages[i]; maxPassages[i] = maxPassages[j]; maxPassages[j] = tmp2; } @Override protected int compare(int i, int j) { return fields[i].compareTo(fields[j]); } }.sort(0, fields.length); // pull stored data: String[][] contents = loadFieldValues(searcher, fields, docids, maxLength); Map<String, Object[]> highlights = new HashMap<>(); for (int i = 0; i < fields.length; i++) { String field = fields[i]; int numPassages = maxPassages[i]; Term floor = new Term(field, ""); Term ceiling = new Term(field, UnicodeUtil.BIG_TERM); SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling); // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords) // Strip off the redundant field: BytesRef terms[] = new BytesRef[fieldTerms.size()]; int termUpto = 0; for (Term term : fieldTerms) { terms[termUpto++] = term.bytes(); } Map<Integer, Object> fieldHighlights = highlightField( field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages, query); Object[] result = new Object[docids.length]; for (int j = 0; j < docidsIn.length; j++) { result[j] = fieldHighlights.get(docidsIn[j]); } highlights.put(field, result); } return highlights; }
public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. // // TODO: See SOLR-5595 boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { NamedList<Object[]> sortVals = new NamedList<>(); // order is important for the sort fields IndexReaderContext topReaderContext = searcher.getTopReaderContext(); List<LeafReaderContext> leaves = topReaderContext.leaves(); LeafReaderContext currentLeaf = null; if (leaves.size() == 1) { // if there is a single segment, use that subReader and avoid looking up each time currentLeaf = leaves.get(0); leaves = null; } DocList docList = rb.getResults().docList; // sort ids from lowest to highest so we can access them in order int nDocs = docList.size(); final long[] sortedIds = new long[nDocs]; final float[] scores = new float[nDocs]; // doc scores, parallel to sortedIds DocList docs = rb.getResults().docList; DocIterator it = docs.iterator(); for (int i = 0; i < nDocs; i++) { sortedIds[i] = (((long) it.nextDoc()) << 32) | i; scores[i] = docs.hasScores() ? it.score() : Float.NaN; } // sort ids and scores together new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { long tmpId = sortedIds[i]; float tmpScore = scores[i]; sortedIds[i] = sortedIds[j]; scores[i] = scores[j]; sortedIds[j] = tmpId; scores[j] = tmpScore; } @Override protected int compare(int i, int j) { return Long.compare(sortedIds[i], sortedIds[j]); } }.sort(0, sortedIds.length); SortSpec sortSpec = rb.getSortSpec(); Sort sort = searcher.weightSort(sortSpec.getSort()); SortField[] sortFields = sort == null ? new SortField[] {SortField.FIELD_SCORE} : sort.getSort(); List<SchemaField> schemaFields = sortSpec.getSchemaFields(); for (int fld = 0; fld < schemaFields.size(); fld++) { SchemaField schemaField = schemaFields.get(fld); FieldType ft = null == schemaField ? null : schemaField.getType(); SortField sortField = sortFields[fld]; SortField.Type type = sortField.getType(); // :TODO: would be simpler to always serialize every position of SortField[] if (type == SortField.Type.SCORE || type == SortField.Type.DOC) continue; FieldComparator<?> comparator = null; LeafFieldComparator leafComparator = null; Object[] vals = new Object[nDocs]; int lastIdx = -1; int idx = 0; for (int i = 0; i < sortedIds.length; ++i) { long idAndPos = sortedIds[i]; float score = scores[i]; int doc = (int) (idAndPos >>> 32); int position = (int) idAndPos; if (leaves != null) { idx = ReaderUtil.subIndex(doc, leaves); currentLeaf = leaves.get(idx); if (idx != lastIdx) { // we switched segments. invalidate comparator. comparator = null; } } if (comparator == null) { comparator = sortField.getComparator(1, 0); leafComparator = comparator.getLeafComparator(currentLeaf); } doc -= currentLeaf.docBase; // adjust for what segment this is in leafComparator.setScorer(new FakeScorer(doc, score)); leafComparator.copy(0, doc); Object val = comparator.value(0); if (null != ft) val = ft.marshalSortValue(val); vals[position] = val; } sortVals.add(sortField.getField(), vals); } rsp.add("merge_values", sortVals); } }