public void testMakeTitleDocument() { IndexId iid = IndexId.get("en-titles"); String text = "Some very simple text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]"; int references = 100; int redirectTargetNamespace = -1; ArrayList<Redirect> redirects = new ArrayList<Redirect>(); redirects.add(new Redirect(0, "Redirect", 2)); ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>(); rel.add(new RelatedTitle(new Title(0, "Related test"), 50)); Hashtable<String, Integer> anchors = new Hashtable<String, Integer>(); anchors.put("Anchor", 20); Date date = new Date(); Article article = new Article( 10, 0, "Test page", text, null, references, redirectTargetNamespace, 0, redirects, rel, anchors, date); analyzer = Analyzers.getIndexerAnalyzer(new FieldBuilder(iid)); highlightAnalyzer = Analyzers.getHighlightAnalyzer(iid, false); try { doc = WikiIndexModifier.makeTitleDocument( article, analyzer, highlightAnalyzer, iid, "wiki", "enwiki", false, new HashSet<String>()); assertEquals("1 [test] 1 [page] 255 [redirect]", tokens("alttitle")); assertEquals("wiki:10", value("pageid")); assertEquals("1 [wiki:0:Test page]", tokens("key")); assertEquals("wiki", value("suffix")); assertEquals("1 [enwiki]", tokens("dbname")); assertEquals("0", value("namespace")); assertEquals("Test page", value("title")); } catch (IOException e) { fail(); } }
public void testSpellcheck() { IndexId iid = IndexId.get("enwiki"); String text = "Some very [[simple]] text used for testing, used for testing of something\n== Heading 1 ==\nParagraph\n[[Category:Category1]]"; int references = 100; int redirectTargetNamespace = -1; ArrayList<Redirect> redirects = new ArrayList<Redirect>(); redirects.add(new Redirect(0, "Redirect", 2)); ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>(); rel.add(new RelatedTitle(new Title(0, "Related test"), 50)); Hashtable<String, Integer> anchors = new Hashtable<String, Integer>(); anchors.put("Anchor", 20); Date date = new Date(); HashSet<String> stopWords = new HashSet<String>(); stopWords.add("of"); stopWords.add("for"); Article article = new Article( 10, 0, "Test for page", text, null, references, redirectTargetNamespace, 0, redirects, rel, anchors, date); FieldBuilder fb = new FieldBuilder( iid, FieldBuilder.Case.IGNORE_CASE, FieldBuilder.Stemmer.NO_STEMMER, FieldBuilder.Options.SPELL_CHECK); fb.getBuilder().getFilters().setStopWords(stopWords); analyzer = Analyzers.getSpellCheckAnalyzer(iid, stopWords); try { doc = WikiIndexModifier.makeDocument(article, fb, iid, stopWords, analyzer, true); assertEquals( "1 [test] 1 [for] 1 [page] 1 [test_for_page] 1 [some] 1 [page_some] 1 [very] 1 [some_very] 1 [simple] 1 [very_simple] 1 [text] 1 [simple_text] 1 [used] 1 [text_used] 1 [testing] 1 [used_for_testing] 1 [testing_used] 1 [of] 1 [something] 1 [testing_of_something] 1 [heading] 1 [something_heading] 1 [1] 1 [heading_1] 1 [paragraph] 1 [1_paragraph] 1 [category1] 1 [paragraph_category1] 1 [category1_test] 1 [anchor] 1 [page_anchor] 1 [redirect] 1 [anchor_redirect]", tokens("contents")); assertEquals( "1 [test] 1 [for] 1 [test_for] 1 [page] 1 [for_page] 1 [test_for_page]", tokens("title")); StringList sl = new StringList(value("spellcheck_context")); assertEquals("[1, page, for, test, simple, heading]", sl.toCollection().toString()); } catch (IOException e) { fail(e.getMessage()); } }
public void testMakeDocuments() { IndexId iid = IndexId.get("enwiki"); String text = "Some very [[simple]] text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]"; int references = 100; int redirectTargetNamespace = -1; ArrayList<Redirect> redirects = new ArrayList<Redirect>(); redirects.add(new Redirect(0, "Redirect", 2)); ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>(); rel.add(new RelatedTitle(new Title(0, "Related test"), 50)); Hashtable<String, Integer> anchors = new Hashtable<String, Integer>(); anchors.put("Anchor", 20); Date date = new Date(); Article article = new Article( 10, 0, "Test page", text, null, references, redirectTargetNamespace, 0, redirects, rel, anchors, date); analyzer = Analyzers.getIndexerAnalyzer(new FieldBuilder(iid)); try { doc = WikiIndexModifier.makeDocument( article, new FieldBuilder(iid), iid, new HashSet<String>(), analyzer); assertEquals( "1 [test] 1 [page] 3 [some] 1 [very] 1 [simple] 1 [text] 1 [used] 0 [use] 1 [for] 1 [testing] 0 [test] 500 [heading] 1 [1] 1 [paragraph] 1 [category1] 10 [test] 1 [page] 10 [anchor] 10 [redirect]", tokens("contents")); assertEquals("1 [test] 1 [page]", tokens("title")); assertEquals("1 [test] 1 [page] 255 [anchor] 256 [redirect]", tokens("alttitle")); assertEquals("1 [related] 1 [test]", tokens("related")); assertEquals("1 [heading] 1 [1]", tokens("sections")); assertEquals("1 [category1]", tokens("category")); assertEquals("10", value("key")); assertEquals("103", value("rank")); assertEquals("1 [0:test page]", tokens("prefix")); assertEquals("1 [egap] 1 [tset]", tokens("reverse_title")); } catch (IOException e) { fail(); } }
public static void testTokenizer() throws Exception { ArrayList<String> hits = new ArrayList<String>(); hits.add("0:Douglas Adams"); hits.add("0:The Hitchhiker's Guide to the Galaxy"); hits.add("0:2001"); hits.add("0:Boot"); IndexId iid = IndexId.get("wikilucene.nspart1.sub1"); FieldBuilder.BuilderSet bs = new FieldBuilder(iid).getBuilder(); Analyzer analyzer = Analyzers.getSearcherAnalyzer(iid); ArrayList<String> stopWords = StopWords.getCached(iid); WikiQueryParser parser = new WikiQueryParser( bs.getFields().contents(), new NamespaceFilter(0), analyzer, bs, NamespacePolicy.IGNORE, stopWords); Query q = parser.parse("miniseries"); HashSet<Term> termSet = new HashSet<Term>(); q.extractTerms(termSet); Iterator<Term> it = termSet.iterator(); while (it.hasNext()) { if (!(it.next().field().equals("contents"))) it.remove(); } Term[] terms = termSet.toArray(new Term[] {}); IndexSearcher searcher = SearcherCache.getInstance().getLocalSearcher(iid); int[] df = searcher.docFreqs(terms); Highlight.highlight( hits, iid, terms, df, searcher.maxDoc(), parser.getWordsClean(), StopWords.getPredefinedSet(iid), false, null, false, false); }
public void testMakeHighlightDocuments() { IndexId iid = IndexId.get("enwiki"); String text = "Some very [[simple]] text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]"; int references = 100; int redirectTargetNamespace = -1; ArrayList<Redirect> redirects = new ArrayList<Redirect>(); redirects.add(new Redirect(0, "Redirect", 2)); ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>(); rel.add(new RelatedTitle(new Title(0, "Related test"), 50)); Hashtable<String, Integer> anchors = new Hashtable<String, Integer>(); anchors.put("Anchor", 20); Date date = new Date(); Article article = new Article( 10, 0, "Test page", text, null, references, redirectTargetNamespace, 0, redirects, rel, anchors, date); analyzer = Analyzers.getHighlightAnalyzer(iid, false); try { doc = WikiIndexModifier.makeHighlightDocument(article, new FieldBuilder(iid), iid); assertEquals("1 [10]", tokens("pageid")); assertEquals("1 [0:Test page]", tokens("key")); } catch (IOException e) { fail(); } }