@Override public String getSnippet( Query query, String field, String s, int maxNumFragments, int fragmentLength, String fragmentSuffix, Formatter formatter) throws IOException { QueryScorer queryScorer = new QueryScorer(query, field); Highlighter highlighter = new Highlighter(formatter, queryScorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength)); TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s)); try { String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix); if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix) && !s.equals(snippet)) { snippet = snippet.concat(fragmentSuffix); } return snippet; } catch (InvalidTokenOffsetsException itoe) { throw new IOException(itoe); } }
/** * 设置关键字高亮 * * @param query 查询对象 * @param list 设置高亮的内容列表 * @param subLength 截取长度 * @param fields 字段名 */ public List<T> keywordsHighlight( BooleanQuery query, List<T> list, int subLength, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(subLength)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); // 设置高亮字段 String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2)); } } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
/** * Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field. * * @param query The current Query * @param fieldName The name of the field * @param request The current SolrQueryRequest */ protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) { SolrParams params = request.getParams(); Highlighter highlighter = new Highlighter( getFormatter(fieldName, params), getEncoder(fieldName, params), getQueryScorer(query, fieldName, request)); highlighter.setTextFragmenter(getFragmenter(fieldName, params)); return highlighter; }
/** * @Title: createHighlighter @Description: 创建高亮器 * * @param query 索引查询对象 * @param prefix 高亮前缀字符串 * @param stuffix 高亮后缀字符串 * @param fragmenterLength 摘要最大长度 * @return */ public static Highlighter createHighlighter( Query query, String prefix, String stuffix, int fragmenterLength) { Formatter formatter = new SimpleHTMLFormatter( (prefix == null || prefix.trim().length() == 0) ? "<font color=\"red\">" : prefix, (stuffix == null || stuffix.trim().length() == 0) ? "</font>" : stuffix); Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength); highlighter.setTextFragmenter(fragmenter); return highlighter; }
/** * Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this * field. * * @param query The current Query * @param fieldName The name of the field * @param request The current SolrQueryRequest * @param tokenStream document text CachingTokenStream * @throws IOException */ protected Highlighter getPhraseHighlighter( Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException { SolrParams params = request.getParams(); Highlighter highlighter = null; highlighter = new Highlighter( getFormatter(fieldName, params), getEncoder(fieldName, params), getSpanQueryScorer(query, fieldName, tokenStream, request)); highlighter.setTextFragmenter(getFragmenter(fieldName, params)); return highlighter; }
public void heighlight(String field, String searchText) { String text = "In this section we'll show you how to make the simplest " + "programmatic query, searching for a single term, and then " + "we'll see how to use QueryParser to accept textual queries. " + "In the sections that follow, we’ll take this simple example " + "further by detailing all the query types built into Lucene. " + "We begin with the simplest search of all: searching for all " + "documents that contain a single term."; Analyzer analyzer = new StandardAnalyzer(); QueryParser queryParser = new QueryParser(field, analyzer); try { Query query = queryParser.parse(searchText); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); TokenStream tokens = analyzer.tokenStream("f", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "f"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); String result = highlighter.getBestFragments(tokens, text, 3, "..."); FileWriter writer = new FileWriter("/home/venugopal/Downloads/Lucene/lia/highter.html"); // #8 writer.write("<html>"); // #8 writer.write( "<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
/** * @param query 索引查询对象 * @param prefix 高亮前缀 * @param suffix 高亮后缀 * @param fragmenterLength 摘要最大长度 * @param ifCustom 是否自定义Highlighter * @return * @author huangzhiqian * @date 2015年12月11日 */ public static Highlighter createHighlighter( Query query, String prefix, String suffix, int fragmenterLength, boolean ifCustom) { Formatter formatter = new SimpleHTMLFormatter( (prefix == null || prefix.trim().length() == 0) ? "<font color=\"blue\">" : prefix, (suffix == null || suffix.trim().length() == 0) ? "</font>" : suffix); Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = null; if (ifCustom) { highlighter = new CusHzqHighlighter(formatter, fragmentScorer); } else { highlighter = new Highlighter(formatter, fragmentScorer); } Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength); highlighter.setTextFragmenter(fragmenter); return highlighter; }
private String doHighlightQuery( Query query, String fieldName, String text, String openMark, String closeMark) { try { SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(openMark, closeMark); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query, fieldName, this.env.defaultField)); highlighter.setTextFragmenter(new NullFragmenter()); String str = highlighter.getBestFragment( this.env.indexAnalyzer, "".equals(fieldName) ? this.env.defaultField : fieldName, text); return null != str ? str : text; } catch (Exception x) { logger.error("Caught an exception:", x); } return text; }
/** * @param key 搜索内容所在的字段名称 * @param value 所要搜索的内容 * @throws CorruptIndexException * @throws IOException * @throws ParseException */ public static void search(String key, String value) throws CorruptIndexException, IOException, ParseException { IndexSearcher searcher; // 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器 // Analyzer analyzer = new IKAnalyzer(); //new StandardAnalyzer(Version.LUCENE_36) // QueryParser qp = new QueryParser(Version.LUCENE_36, key,analyzer); searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH)))); searcher.setSimilarity(new IKSimilarity()); Query query = IKQueryParser.parse(key, value); // Query tq = qp.parse(value); TopDocs topDocs = searcher.search(query, 10000); // searcher.search(query, results); // new function ScoreDoc[] scoreDocs = topDocs.scoreDocs; System.out.println("命中:" + topDocs.totalHits); Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); Analyzer analyzer = new IKAnalyzer(); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docSn = scoreDoc.doc; // 文档内部编号 Document doc = searcher.doc(docSn); // 根据文档编号取出文档 hightlightFeild(highlighter, analyzer, doc, "name"); hightlightFeild(highlighter, analyzer, doc, "address"); hightlightFeild(highlighter, analyzer, doc, "datatype"); // scoreDocs[i]. System.out.println("name:" + doc.get("name")); // new function System.out.println("address:" + doc.get("address")); System.out.println("datatype:" + doc.get("datatype")); System.out.println("geom:" + doc.get("geom")); } }
/** * Serch method * * @param f * @param luquery * @param entities * @return * @throws Exception */ @Transactional(propagation = Propagation.REQUIRED) public List<SearchResultWH> searchWH( String[] f, String luquery, String hgQuery, SortField[] sortFields, int firstResult, int maxResults, Class<?>... entities) throws Exception { // create FullTextEntityManager ---------------------------- FullTextEntityManager fullTextEntityManager = org.hibernate.search.jpa.Search.getFullTextEntityManager(entityManager); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); // --------------------------------------------------------- QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_31, f, analyzer); org.apache.lucene.search.Query query = parser.parse(luquery.trim()); System.out.println("QUERY: " + query + " entitys size:" + entities.length); // wrap Lucene query in a javax.persistence.Query // javax.persistence.Query persistenceQuery = fullTextEntityManager // .createFullTextQuery(query, entities); org.hibernate.search.jpa.FullTextQuery persistenceQuery = fullTextEntityManager.createFullTextQuery(query, entities); // org.apache.lucene.search.Sort sort = new Sort( // new SortField("title", SortField.STRING)); if (sortFields != null && sortFields.length > 0) { persistenceQuery.setSort(new Sort(sortFields)); System.out.println("Sort setted"); } if (firstResult >= 0) { persistenceQuery.setFirstResult(firstResult); persistenceQuery.setMaxResults(maxResults); } // execute search @SuppressWarnings("unchecked") List<Object> result = persistenceQuery.getResultList(); // --------------------------------------------------------------- QueryScorer qs = null; if (hgQuery != null && hgQuery.trim().length() > 0) { // Если запрос для // подсветки // специфичный qs = new QueryScorer(parser.parse(hgQuery.trim())); } else { qs = new QueryScorer(query); } Highlighter highlighter = new Highlighter( new Formatter() { String preTag = "<font color=\"red\"><b>"; String postTag = "</b></font>"; @Override public String highlightTerm(String originalText, TokenGroup tokenGroup) { StringBuffer returnBuffer; if (tokenGroup.getTotalScore() > 0) { returnBuffer = new StringBuffer(); returnBuffer.append(preTag); returnBuffer.append(originalText); returnBuffer.append(postTag); return returnBuffer.toString(); } return originalText; } }, qs); highlighter.setTextFragmenter(new SimpleFragmenter(150)); // --------------------------------------------------------------- List<SearchResultWH> listWH = new ArrayList<SearchResultWH>(); for (Object o : result) { if (o instanceof Hotel) { Hotel h = (Hotel) o; h.getResortTypes().size(); // LAZY load listWH.add(makeHW(highlighter, analyzer, h)); } else if (o instanceof ResortType) { listWH.add(makeHW(highlighter, analyzer, (ResortType) o)); } else if (o instanceof PublicImage) { listWH.add(makeHW(highlighter, analyzer, (PublicImage) o)); } else if (o instanceof FileResourse) { listWH.add(makeHW(highlighter, analyzer, (FileResourse) o)); } else if (o instanceof News) { listWH.add(makeHW(highlighter, analyzer, (News) o)); } else if (o instanceof Announce) { listWH.add(makeHW(highlighter, analyzer, (Announce) o)); } else if (o instanceof Location) { listWH.add(makeHW(highlighter, analyzer, (Location) o)); } else if (o instanceof LType) { listWH.add(makeHW(highlighter, analyzer, (LType) o)); } else if (o instanceof AnyText) { listWH.add(makeHW(highlighter, analyzer, (AnyText) o)); } else if (o instanceof Visa) { listWH.add(makeHW(highlighter, analyzer, (Visa) o)); } } return listWH; }
@SuppressWarnings("resource") public static String getTextAbstract(String title, String content) { try { content = getTextFromHtml(content); org.apache.lucene.search.Query q = IKQueryParser.parse("CONTENT", title); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("", ""); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(q)); highlighter.setTextFragmenter(new SimpleFragmenter(200)); org.apache.lucene.analysis.TokenStream tokenStream = (new IKAnalyzer()).tokenStream("CONTENT", new StringReader(content)); String tmp = highlighter.getBestFragment(tokenStream, content); if (HtmlStringUtil.isNotEmpty(tmp)) content = tmp.trim(); } catch (Exception e) { e.printStackTrace(); } int start = 0; int end = 0; boolean startFlag = true; for (int i = 0; i < content.length(); i++) { char c = content.charAt(i); if (startFlag) { if (Character.isWhitespace(c) || Character.isISOControl(c) || c == ',' || c == ',' || c == '”' || c == '’' || c == '.' || c == '。' || c == '>' || c == '?' || c == '?' || c == ' ' || c == ' ' || c == ' ' || c == '!' || c == '!' || c == ';' || c == ';' || c == ':' || c == ':' || c == ']' || c == ']') continue; start = i; startFlag = false; } if (!startFlag) if (c == '.' || c == '。' || c == '?' || c == '?' || c == '!' || c == '!' || c == ' ' || c == ' ' || c == ' ') { if (i < 8) start = i + 1; end = i; if (i != content.length() - 1 && (content.charAt(i + 1) == '”' || content.charAt(i + 1) == '’')) end = i + 1; } else { if ((c == ',' || c == ',' || c == '>' || c == '》' || c == '、') && i < 2) start = i + 1; if (c == '’' || c == '”') if (i != content.length() - 1) { char next = content.charAt(i + 1); if (next != ',' && next == ',' && next == '、' && next == ';' && next == ';') end = i + 1; } else { end = i; } } } if (end != 0 && end > start) { content = content.substring(start, end + 1).trim(); start = 0; for (int i = 0; i < content.length(); i++) { char c = content.charAt(i); if ((c == '.' || c == '。' || c == '?' || c == '?' || c == '!' || c == '!' || c == ' ' || c == ' ' || c == ' ') && i < 8) start = i + 1; } if (start != 0) content = content.substring(start); end = 0; if (HtmlStringUtil.isNotEmpty(content)) { char c = content.charAt(content.length() - 1); if (c != '.' && c != '。' && c != '?' && c != '?' && c != '!' && c != '!') { for (int i = content.length() - 2; i > 0; i--) { c = content.charAt(i); if (c != ';' && c != ';' && c != ',' && c != ',' && c != '>' && c != '》') continue; end = i; break; } } } if (end != 0) content = content.substring(0, end); } return content; }
/** * @param currentPageNum * @return * @throws Exception */ public List<DocumentEntity> getResult(int currentPageNum) throws Exception { List<DocumentEntity> list = new ArrayList<DocumentEntity>(); directory = FSDirectory.open(new File(SystemConstant.indexDir)); reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); // 高亮显示设置 Highlighter highlighter = null; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'><b>", "</b></font>"); Highlighter highlighterTitle = null; SimpleHTMLFormatter formatTitle = new SimpleHTMLFormatter("<FONT color=#c60a00>", "</FONT>"); ScoreDoc[] hits = this.getScoreDocs(); Query query = this.getQuery(); highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterTitle = new Highlighter(formatTitle, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(200)); // 这个200是指定关键字字符串的context // 的长度,你可以自己设定,因为不可能返回整篇正文内容 Document doc; String fileName = ""; int totalNumber = currentPageNum * eachePageNum; if (totalNumber > hits.length) totalNumber = hits.length; for (int i = (currentPageNum - 1) * eachePageNum; i < totalNumber; i++) { // 打印文档的内容 doc = searcher.doc(hits[i].doc); // System.out.println(doc.toString()); // if(this.docType.equals(doc.get("type"))){ // 高亮出显示 DocumentEntity docEntity = new DocumentEntity(); TokenStream tokenStream = new PaodingAnalyzer().tokenStream("contents", new StringReader(doc.get("contents"))); docEntity.setContents(highlighter.getBestFragment(tokenStream, doc.get("contents"))); // System.out.println("----------"+i+"----------"); // System.out.println(docEntity.getContents()); fileName = doc.get("fileName"); tokenStream = new PaodingAnalyzer().tokenStream("fileName", new StringReader(fileName)); // 需要注意:在处理时如果文本检索结果中不包含对应的关键字返回一个null String forMatt = highlighterTitle.getBestFragment(tokenStream, fileName); if (forMatt == null) docEntity.setFilename(fileName); else docEntity.setFilename(forMatt); String type1 = doc.get("type"); docEntity.setType(type1); docEntity.setId(doc.get("id")); if ("pdf".equalsIgnoreCase(type1)) { fileName = SystemConstant.CONTEXT + SystemConstant.PDFdir + fileName + "." + type1; docEntity.setOriginalFileName(fileName); } else if ("doc".equalsIgnoreCase(type1)) { fileName = SystemConstant.CONTEXT + SystemConstant.Docdir + fileName + "." + type1; docEntity.setOriginalFileName(fileName); } // System.out.println(docEntity.getType()); list.add(docEntity); // System.out.println(docEntity.getFilename()); // System.out.println("--------------------"+doc.get("fileName")); // }//end for if // else continue; } // end for return list; }