public static void testTokenizer() throws Exception { ArrayList<String> hits = new ArrayList<String>(); hits.add("0:Douglas Adams"); hits.add("0:The Hitchhiker's Guide to the Galaxy"); hits.add("0:2001"); hits.add("0:Boot"); IndexId iid = IndexId.get("wikilucene.nspart1.sub1"); FieldBuilder.BuilderSet bs = new FieldBuilder(iid).getBuilder(); Analyzer analyzer = Analyzers.getSearcherAnalyzer(iid); ArrayList<String> stopWords = StopWords.getCached(iid); WikiQueryParser parser = new WikiQueryParser( bs.getFields().contents(), new NamespaceFilter(0), analyzer, bs, NamespacePolicy.IGNORE, stopWords); Query q = parser.parse("miniseries"); HashSet<Term> termSet = new HashSet<Term>(); q.extractTerms(termSet); Iterator<Term> it = termSet.iterator(); while (it.hasNext()) { if (!(it.next().field().equals("contents"))) it.remove(); } Term[] terms = termSet.toArray(new Term[] {}); IndexSearcher searcher = SearcherCache.getInstance().getLocalSearcher(iid); int[] df = searcher.docFreqs(terms); Highlight.highlight( hits, iid, terms, df, searcher.maxDoc(), parser.getWordsClean(), StopWords.getPredefinedSet(iid), false, null, false, false); }
public static void timeTest(String dbname, String dbnameSrc) throws Exception { IndexId src = IndexId.get(dbnameSrc); IndexId iid = IndexId.get(dbname).getHighlight(); IndexReader reader = IndexReader.open(IndexRegistry.getInstance().getCurrentSearch(src).path); int maxDoc = reader.maxDoc(); ArrayList<String> words = new ArrayList<String>(); words.add("in"); words.add("the"); words.add("some"); words.add("book"); Term[] terms = new Term[5]; terms[0] = new Term("contents", "in"); terms[1] = new Term("contents", "the"); terms[2] = new Term("contents", "some"); terms[3] = new Term("contents", "som"); terms[4] = new Term("contents", "book"); int[] df = new int[5]; df[0] = 1000; df[1] = 4000; df[2] = 200; df[3] = 500; df[4] = 100; HashSet<String> stopWords = StopWords.getPredefinedSet(src); int count = 0; int total = 10000; long start = System.currentTimeMillis(); for (int i = 0; i < total; i++) { ArrayList<String> hits = new ArrayList<String>(); for (int j = 0; j < 10; j++) { int docid = (int) (Math.random() * maxDoc); Document doc = reader.document(docid); hits.add(doc.get("namespace") + ":" + doc.get("title")); } Highlight.ResultSet rs = Highlight.highlight( hits, iid, terms, df, maxDoc, words, stopWords, false, null, false, false); HashMap<String, HighlightResult> res = rs.highlighted; count += res.size(); if (i != 0 && i % 200 == 0) { long delta = System.currentTimeMillis() - start; System.out.println( "[" + formatTime(delta) + "] " + ((float) delta / (count / 10)) + " ms / 10 articles, found " + count + "/" + (total * 10)); } } long delta = System.currentTimeMillis() - start; System.out.println( "Elapsed " + formatTime(delta) + ", " + ((float) delta / (count / 10)) + " ms / 10 articles, found " + count + "/" + (total * 10)); }