/** * 设置关键字高亮 * * @param query 查询对象 * @param list 设置高亮的内容列表 * @param subLength 截取长度 * @param fields 字段名 */ public List<T> keywordsHighlight( BooleanQuery query, List<T> list, int subLength, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(subLength)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); // 设置高亮字段 String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2)); } } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
public static String hightlightFeild( Highlighter highlighter, Analyzer analyzer, Document doc, String feild) { String docContent = doc.get(feild); try { String hc = highlighter.getBestFragment(analyzer, feild, docContent); if (hc == null) { if (docContent.length() >= 50) { hc = docContent.substring(0, 50); } else { hc = docContent; } } doc.getField(feild).setValue(hc); // System.out.println(hc); return hc; } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return docContent; }
public void heighlight(String field, String searchText) { String text = "In this section we'll show you how to make the simplest " + "programmatic query, searching for a single term, and then " + "we'll see how to use QueryParser to accept textual queries. " + "In the sections that follow, we’ll take this simple example " + "further by detailing all the query types built into Lucene. " + "We begin with the simplest search of all: searching for all " + "documents that contain a single term."; Analyzer analyzer = new StandardAnalyzer(); QueryParser queryParser = new QueryParser(field, analyzer); try { Query query = queryParser.parse(searchText); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); TokenStream tokens = analyzer.tokenStream("f", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "f"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); String result = highlighter.getBestFragments(tokens, text, 3, "..."); FileWriter writer = new FileWriter("/home/venugopal/Downloads/Lucene/lia/highter.html"); // #8 writer.write("<html>"); // #8 writer.write( "<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
/** * 高亮设置 * * @param query * @param doc * @param field * @return */ private String toHighlighter(Analyzer analyzer, Query query, Document doc) { String field = "text"; try { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field) : highlighterStr; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
public String getContext(String sample) throws IOException { String result = ""; try { String highlight_query_str = cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes; String query_str = "padded_length:[" + String.format("%09d", cfg.minDocLen) + " TO *]"; if (cfg.enableTitleSearch) { query_str += " AND (title:" + cfg.quotes + sample + cfg.quotes + " OR " + cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes + ")"; } else { query_str += " AND (" + cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes + ")"; } Query query = parser.parse(query_str); Query highlight_query = parser.parse(highlight_query_str); if (cfg.debug == true) System.out.println("Searching (" + query + ")....."); TopDocs topDocs = searcher.search(query, cfg.maxHits != 0 ? cfg.maxHits : Integer.MAX_VALUE); if (topDocs.totalHits > 0) { ScoreDoc[] hits = topDocs.scoreDocs; if (cfg.debug == true) System.out.println("Results (" + hits.length + ") :)"); String data; int indx; SimpleHTMLFormatter htmlFormatter = null; Highlighter highlighter = null; if (cfg.displayHighlights) { htmlFormatter = new SimpleHTMLFormatter(); highlighter = new Highlighter(htmlFormatter, new QueryScorer(highlight_query)); } for (int i = 0; i < hits.length; i++) { if (cfg.displayDID) { result += String.format("\t%d", hits[i].doc); } if (cfg.displayScore) { result += String.format("\t%f", hits[i].score); } if (cfg.displayLen) { result += "\t" + indexReader.document(hits[i].doc).getField("length").stringValue(); } if (cfg.displayTitle) { data = indexReader.document(hits[i].doc).getField("title").stringValue(); if (cfg.removeParen && (indx = data.indexOf(" (")) != -1) data = indexReader .document(hits[i].doc) .getField("title") .stringValue() .substring(0, indx); result += "\t" + data; } if (cfg.displayTxt || cfg.displayHighlights) { String text = indexReader.document(hits[i].doc).getField("text").stringValue(); if (cfg.displayTxt) result += "\t" + text; if (cfg.displayHighlights) { TokenStream tokenStream = TokenSources.getAnyTokenStream( searcher.getIndexReader(), hits[i].doc, "text", stdAnalyzer); TextFragment[] frag; try { frag = highlighter.getBestTextFragments(tokenStream, text, false, 10); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { result += "\t" + (frag[j].toString()); } } } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } // highlighter.getBestFragments(tokenStream, text, 3, "..."); } } if (cfg.displayCategories) { IndexableField categories[] = indexReader.document(hits[i].doc).getFields("category"); for (int j = 0; j < categories.length && (cfg.numCategories == 0 || j < cfg.numCategories); j++) { result += "\t" + categories[j].stringValue(); } } result += System.lineSeparator() + System.lineSeparator() + System.lineSeparator(); } } else if (cfg.debug == true) System.out.println("No results found :("); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } // return result; }