@Test public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.addDocument( doc() .add(field("_id", "1")) .add( field( "content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)) .build()); IndexReader reader = indexWriter.getReader(); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); System.out.println(fragment); }
/** * Generates a list of Highlighted query fragments for each item in a list of documents, or * returns null if highlighting is disabled. * * @param docs query results * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * @return NamedList containing a NamedList for each document, which in turns contains sets * (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting( DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
private void doHighlightingByFastVectorHighlighter( FastVectorHighlighter highlighter, FieldQuery fieldQuery, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { SolrParams params = req.getParams(); SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params); String[] snippets = highlighter.getBestFragments( fieldQuery, req.getSearcher().getIndexReader(), docId, fieldName, params.getFieldInt(fieldName, HighlightParams.FRAGSIZE, 100), params.getFieldInt(fieldName, HighlightParams.SNIPPETS, 1), getFragListBuilder(fieldName, params), getFragmentsBuilder(fieldName, params), solrFb.getPreTags(params, fieldName), solrFb.getPostTags(params, fieldName), getEncoder(fieldName, params)); if (snippets != null && snippets.length > 0) docSummaries.add(fieldName, snippets); else alternateField(docSummaries, params, doc, fieldName); }
public List<LuceneSearchResult> search(String keyword, boolean highlight, int size) throws IOException, ParseException { IndexReader reader = luceneIndex.reader(); try { IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new IKAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_4_9, fields, analyzer, boosts); // 将关键字包装成Query对象 Query query = parser.parse(keyword); TopDocs results = searcher.search(query, size); FragListBuilder fragListBuilder = new SimpleFragListBuilder(); FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder( BaseFragmentsBuilder.COLORED_PRE_TAGS, BaseFragmentsBuilder.COLORED_POST_TAGS); FastVectorHighlighter fvh = new FastVectorHighlighter(true, true, fragListBuilder, fragmentsBuilder); FieldQuery fq = fvh.getFieldQuery(query); // System.out.println("命中--》" + results.totalHits); List<LuceneSearchResult> searchResults = new ArrayList<LuceneSearchResult>(); for (ScoreDoc sd : results.scoreDocs) { // 当查询不到高亮信息时,返回内容为Null // String highContent = fvh.getBestFragment(fq, reader, sd.doc, "content", 100); // System.out.println("highContent-->" + highContent); String highTitle = null; if (highlight) { fvh.getBestFragment(fq, reader, sd.doc, "title", 100); if (highTitle == null) { Document doc = searcher.doc(sd.doc); /** 如果高亮内容为null,那么表示标题没有需要高亮的内容,那么赋值为原有标题 */ highTitle = doc.get("title"); } } else { highTitle = searcher.doc(sd.doc).get("title"); } String id = searcher.doc(sd.doc).get("id"); searchResults.add(new LuceneSearchResult(id, highTitle)); } return searchResults; } finally { reader.close(); } }
@Test public void testVectorHighlighterPrefixQuery() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.addDocument( doc() .add(field("_id", "1")) .add( field( "content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)) .build()); IndexReader reader = indexWriter.getReader(); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba")); assertThat( prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName())); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(prefixQuery), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE); Query rewriteQuery = prefixQuery.rewrite(reader); fragment = highlighter.getBestFragment( highlighter.getFieldQuery(rewriteQuery), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); System.out.println(fragment); // now check with the custom field query prefixQuery = new PrefixQuery(new Term("content", "ba")); assertThat( prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName())); CustomFieldQuery.reader.set(reader); fragment = highlighter.getBestFragment( new CustomFieldQuery(prefixQuery, highlighter), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); System.out.println(fragment); }