private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { SolrParams params = req.getParams(); String[] docTexts = doc.getValues(fieldName); // according to Document javadoc, doc.getValues() never returns null. check empty instead of // null if (docTexts.length == 0) return; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); TokenStream tstream = null; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); String[] summaries = null; List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization try { TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName); if (tvStream != null) { tots = new TermOffsetsTokenStream(tvStream); } } catch (IllegalArgumentException e) { // No problem. But we can't use TermOffsets optimization. } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tstream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { // TODO: this is not always necessary - eventually we would like to avoid this wrap // when it is not needed. tstream = new CachingTokenFilter(tstream); // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream); // after highlighter initialization, reset tstream since construction of highlighter already // used it tstream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } int maxCharsToAnalyze = params.getFieldInt( fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments( tstream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first Collections.sort( frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } if (fragTexts.size() >= numFragments) break; } summaries = fragTexts.toArray(new String[0]); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return; // END: Hack SolrParams params = req.getParams(); IndexableField[] docFields = doc.getFields(fieldName); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { listFields.add(field.stringValue()); } // preserve order of values in a multiValued list boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false); String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]); // according to Document javadoc, doc.getValues() never returns null. check empty instead of // null if (docTexts.length == 0) return; TokenStream tstream = null; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); String[] summaries = null; List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization TokenStream tvStream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName); if (tvStream != null) { tots = new TermOffsetsTokenStream(tvStream); } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tstream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } int maxCharsToAnalyze = params.getFieldInt( fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tstream = new CachingTokenFilter(tstream); } else { tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream); // after highlighter initialization, reset tstream since construction of highlighter already // used it tstream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments( tstream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if (preserveMulti) { if (bestTextFragments[k] != null) { frags.add(bestTextFragments[k]); } } else { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first if (!preserveMulti) { Collections.sort( frags, new Comparator<TextFragment>() { @Override public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); } // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if (preserveMulti) { if (fragment != null) { fragTexts.add(fragment.toString()); } } else { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } } if (fragTexts.size() >= numFragments && !preserveMulti) break; } summaries = fragTexts.toArray(new String[0]); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }