/** * Annoncements adapter * * @param hg * @param a * @param ann * @return * @throws Exception */ public SearchResultWH makeHW(Highlighter hg, Analyzer a, Announce ann) throws Exception { String s = ""; { String text = ann.getITopDescription() + ""; TokenStream tokenStream = a.tokenStream("topdescription", new StringReader(text)); s += cP( "Совпадения в заголовке объявления", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); } { String text = ann.getIDescription() + ""; TokenStream tokenStream = a.tokenStream("description", new StringReader(text)); s += cP( "Совпадения в тексте объявления", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); } String metatexts = ""; { String text = ann.getMeta_keywords() + ""; TokenStream tokenStream = a.tokenStream("meta_keywords", new StringReader(text)); metatexts += cPmeta( "Совпадения в keywords", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); text = ann.getMeta_description() + ""; tokenStream = a.tokenStream("meta_description", new StringReader(text)); metatexts += cPmeta( "Совпадения в description", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); text = ann.getMeta_subject() + ""; tokenStream = a.tokenStream("meta_subject", new StringReader(text)); metatexts += cPmeta( "Совпадения в subject", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); } SearchResultWH swh = new SearchResultWH(ann, "Announce", s, metatexts); return swh; }
@Override public String getSnippet( Query query, String field, String s, int maxNumFragments, int fragmentLength, String fragmentSuffix, Formatter formatter) throws IOException { QueryScorer queryScorer = new QueryScorer(query, field); Highlighter highlighter = new Highlighter(formatter, queryScorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength)); TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s)); try { String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix); if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix) && !s.equals(snippet)) { snippet = snippet.concat(fragmentSuffix); } return snippet; } catch (InvalidTokenOffsetsException itoe) { throw new IOException(itoe); } }
/** * Visa adapter * * @param hg * @param a * @param ann * @return * @throws Exception */ public SearchResultWH makeHW(Highlighter hg, Analyzer a, Visa v) throws Exception { String s = ""; String text = v.getIDescription() + ""; TokenStream tokenStream = a.tokenStream("description", new StringReader(text)); s += cP( "Совпадения в описании", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); String metatexts = ""; { text = v.getMeta_keywords() + ""; tokenStream = a.tokenStream("meta_keywords", new StringReader(text)); metatexts += cPmeta( "Совпадения в keywords", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); text = v.getMeta_description() + ""; tokenStream = a.tokenStream("meta_description", new StringReader(text)); metatexts += cPmeta( "Совпадения в description", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); text = v.getMeta_subject() + ""; tokenStream = a.tokenStream("meta_subject", new StringReader(text)); metatexts += cPmeta( "Совпадения в subject", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); } SearchResultWH swh = new SearchResultWH(v, "Visa", s, metatexts); return swh; }
/** * Public image adapter * * @param hg * @param a * @param pi * @return * @throws Exception */ public SearchResultWH makeHW(Highlighter hg, Analyzer a, PublicImage pi) throws Exception { String text = pi.getDescription() + ""; TokenStream tokenStream = a.tokenStream("description", new StringReader(text)); String s = cP( "Совпадения в описании", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); SearchResultWH swh = new SearchResultWH(pi, "PublicImage", s); return swh; }
/** * Any text adapter * * @param hg * @param a * @param ann * @return * @throws Exception */ public SearchResultWH makeHW(Highlighter hg, Analyzer a, AnyText t) throws Exception { String s = ""; String text = t.getIDescription() + ""; TokenStream tokenStream = a.tokenStream("anytext", new StringReader(text)); s += cP( "Совпадения в тексте", hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... ")); SearchResultWH swh = new SearchResultWH(t, "AnyText", s); return swh; }
protected String retrieveResultSummary( Document document, Highlighter highlighter, StandardAnalyzer analyzer) throws InvalidTokenOffsetsException, IOException { String content = document.get(FIELD_TOPIC_CONTENT); TokenStream tokenStream = analyzer.tokenStream(FIELD_TOPIC_CONTENT, new StringReader(content)); String summary = highlighter.getBestFragments(tokenStream, content, 3, "..."); if (StringUtils.isBlank(summary) && !StringUtils.isBlank(content)) { summary = StringEscapeUtils.escapeHtml(content.substring(0, Math.min(200, content.length()))); if (Math.min(200, content.length()) == 200) { summary += "..."; } } return summary; }
public void heighlight(String field, String searchText) { String text = "In this section we'll show you how to make the simplest " + "programmatic query, searching for a single term, and then " + "we'll see how to use QueryParser to accept textual queries. " + "In the sections that follow, we’ll take this simple example " + "further by detailing all the query types built into Lucene. " + "We begin with the simplest search of all: searching for all " + "documents that contain a single term."; Analyzer analyzer = new StandardAnalyzer(); QueryParser queryParser = new QueryParser(field, analyzer); try { Query query = queryParser.parse(searchText); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); TokenStream tokens = analyzer.tokenStream("f", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "f"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); String result = highlighter.getBestFragments(tokens, text, 3, "..."); FileWriter writer = new FileWriter("/home/venugopal/Downloads/Lucene/lia/highter.html"); // #8 writer.write("<html>"); // #8 writer.write( "<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
/** * Searches pages using a particular combination of flags. * * @param query The query to perform in Lucene query language * @param flags A set of flags * @return A Collection of SearchResult instances * @throws ProviderException if there is a problem with the backend */ public Collection findPages(String query, int flags) throws ProviderException { IndexSearcher searcher = null; ArrayList<SearchResult> list = null; Highlighter highlighter = null; try { String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer()); // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); Query luceneQuery = qp.parse(query); if ((flags & FLAG_CONTEXTS) != 0) { highlighter = new Highlighter( new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery)); } try { File dir = new File(m_luceneDirectory); Directory luceneDir = new SimpleFSDirectory(dir, null); IndexReader reader = IndexReader.open(luceneDir); searcher = new IndexSearcher(reader); } catch (Exception ex) { log.info("Lucene not yet ready; indexing not started", ex); return null; } ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs; list = new ArrayList<SearchResult>(hits.length); for (int curr = 0; curr < hits.length; curr++) { int docID = hits[curr].doc; Document doc = searcher.doc(docID); String pageName = doc.get(LUCENE_ID); WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); if (page != null) { if (page instanceof Attachment) { // Currently attachments don't look nice on the search-results page // When the search-results are cleaned up this can be enabled again. } int score = (int) (hits[curr].score * 100); // Get highlighted search contexts String text = doc.get(LUCENE_PAGE_CONTENTS); String[] fragments = new String[0]; if (text != null && highlighter != null) { TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); } SearchResult result = new SearchResultImpl(page, score, fragments); list.add(result); } else { log.error( "Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); pageRemoved(new WikiPage(m_engine, pageName)); } } } catch (IOException e) { log.error("Failed during lucene search", e); } catch (ParseException e) { log.info("Broken query; cannot parse query ", e); throw new ProviderException( "You have entered a query Lucene cannot process: " + e.getMessage()); } catch (InvalidTokenOffsetsException e) { log.error("Tokens are incompatible with provided text ", e); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { log.error(e); } } } return list; }