public void testMakeTitleDocument() {
    IndexId iid = IndexId.get("en-titles");
    String text =
        "Some very simple text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]";
    int references = 100;
    int redirectTargetNamespace = -1;
    ArrayList<Redirect> redirects = new ArrayList<Redirect>();
    redirects.add(new Redirect(0, "Redirect", 2));
    ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>();
    rel.add(new RelatedTitle(new Title(0, "Related test"), 50));
    Hashtable<String, Integer> anchors = new Hashtable<String, Integer>();
    anchors.put("Anchor", 20);
    Date date = new Date();

    Article article =
        new Article(
            10,
            0,
            "Test page",
            text,
            null,
            references,
            redirectTargetNamespace,
            0,
            redirects,
            rel,
            anchors,
            date);

    analyzer = Analyzers.getIndexerAnalyzer(new FieldBuilder(iid));
    highlightAnalyzer = Analyzers.getHighlightAnalyzer(iid, false);

    try {
      doc =
          WikiIndexModifier.makeTitleDocument(
              article,
              analyzer,
              highlightAnalyzer,
              iid,
              "wiki",
              "enwiki",
              false,
              new HashSet<String>());
      assertEquals("1 [test] 1 [page] 255 [redirect]", tokens("alttitle"));
      assertEquals("wiki:10", value("pageid"));
      assertEquals("1 [wiki:0:Test page]", tokens("key"));
      assertEquals("wiki", value("suffix"));
      assertEquals("1 [enwiki]", tokens("dbname"));
      assertEquals("0", value("namespace"));
      assertEquals("Test page", value("title"));
    } catch (IOException e) {
      fail();
    }
  }
  public void testSpellcheck() {
    IndexId iid = IndexId.get("enwiki");
    String text =
        "Some very [[simple]] text used for testing, used for testing of something\n== Heading 1 ==\nParagraph\n[[Category:Category1]]";
    int references = 100;
    int redirectTargetNamespace = -1;
    ArrayList<Redirect> redirects = new ArrayList<Redirect>();
    redirects.add(new Redirect(0, "Redirect", 2));
    ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>();
    rel.add(new RelatedTitle(new Title(0, "Related test"), 50));
    Hashtable<String, Integer> anchors = new Hashtable<String, Integer>();
    anchors.put("Anchor", 20);
    Date date = new Date();

    HashSet<String> stopWords = new HashSet<String>();
    stopWords.add("of");
    stopWords.add("for");

    Article article =
        new Article(
            10,
            0,
            "Test for page",
            text,
            null,
            references,
            redirectTargetNamespace,
            0,
            redirects,
            rel,
            anchors,
            date);

    FieldBuilder fb =
        new FieldBuilder(
            iid,
            FieldBuilder.Case.IGNORE_CASE,
            FieldBuilder.Stemmer.NO_STEMMER,
            FieldBuilder.Options.SPELL_CHECK);
    fb.getBuilder().getFilters().setStopWords(stopWords);

    analyzer = Analyzers.getSpellCheckAnalyzer(iid, stopWords);

    try {
      doc = WikiIndexModifier.makeDocument(article, fb, iid, stopWords, analyzer, true);
      assertEquals(
          "1 [test] 1 [for] 1 [page] 1 [test_for_page] 1 [some] 1 [page_some] 1 [very] 1 [some_very] 1 [simple] 1 [very_simple] 1 [text] 1 [simple_text] 1 [used] 1 [text_used] 1 [testing] 1 [used_for_testing] 1 [testing_used] 1 [of] 1 [something] 1 [testing_of_something] 1 [heading] 1 [something_heading] 1 [1] 1 [heading_1] 1 [paragraph] 1 [1_paragraph] 1 [category1] 1 [paragraph_category1] 1 [category1_test] 1 [anchor] 1 [page_anchor] 1 [redirect] 1 [anchor_redirect]",
          tokens("contents"));
      assertEquals(
          "1 [test] 1 [for] 1 [test_for] 1 [page] 1 [for_page] 1 [test_for_page]", tokens("title"));
      StringList sl = new StringList(value("spellcheck_context"));
      assertEquals("[1, page, for, test, simple, heading]", sl.toCollection().toString());
    } catch (IOException e) {
      fail(e.getMessage());
    }
  }
  public void testMakeDocuments() {
    IndexId iid = IndexId.get("enwiki");
    String text =
        "Some very [[simple]] text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]";
    int references = 100;
    int redirectTargetNamespace = -1;
    ArrayList<Redirect> redirects = new ArrayList<Redirect>();
    redirects.add(new Redirect(0, "Redirect", 2));
    ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>();
    rel.add(new RelatedTitle(new Title(0, "Related test"), 50));
    Hashtable<String, Integer> anchors = new Hashtable<String, Integer>();
    anchors.put("Anchor", 20);
    Date date = new Date();

    Article article =
        new Article(
            10,
            0,
            "Test page",
            text,
            null,
            references,
            redirectTargetNamespace,
            0,
            redirects,
            rel,
            anchors,
            date);

    analyzer = Analyzers.getIndexerAnalyzer(new FieldBuilder(iid));
    try {
      doc =
          WikiIndexModifier.makeDocument(
              article, new FieldBuilder(iid), iid, new HashSet<String>(), analyzer);
      assertEquals(
          "1 [test] 1 [page] 3 [some] 1 [very] 1 [simple] 1 [text] 1 [used] 0 [use] 1 [for] 1 [testing] 0 [test] 500 [heading] 1 [1] 1 [paragraph] 1 [category1] 10 [test] 1 [page] 10 [anchor] 10 [redirect]",
          tokens("contents"));
      assertEquals("1 [test] 1 [page]", tokens("title"));
      assertEquals("1 [test] 1 [page] 255 [anchor] 256 [redirect]", tokens("alttitle"));
      assertEquals("1 [related] 1 [test]", tokens("related"));
      assertEquals("1 [heading] 1 [1]", tokens("sections"));
      assertEquals("1 [category1]", tokens("category"));
      assertEquals("10", value("key"));
      assertEquals("103", value("rank"));
      assertEquals("1 [0:test page]", tokens("prefix"));
      assertEquals("1 [egap] 1 [tset]", tokens("reverse_title"));

    } catch (IOException e) {
      fail();
    }
  }
コード例 #4
0
 public static void testTokenizer() throws Exception {
   ArrayList<String> hits = new ArrayList<String>();
   hits.add("0:Douglas Adams");
   hits.add("0:The Hitchhiker's Guide to the Galaxy");
   hits.add("0:2001");
   hits.add("0:Boot");
   IndexId iid = IndexId.get("wikilucene.nspart1.sub1");
   FieldBuilder.BuilderSet bs = new FieldBuilder(iid).getBuilder();
   Analyzer analyzer = Analyzers.getSearcherAnalyzer(iid);
   ArrayList<String> stopWords = StopWords.getCached(iid);
   WikiQueryParser parser =
       new WikiQueryParser(
           bs.getFields().contents(),
           new NamespaceFilter(0),
           analyzer,
           bs,
           NamespacePolicy.IGNORE,
           stopWords);
   Query q = parser.parse("miniseries");
   HashSet<Term> termSet = new HashSet<Term>();
   q.extractTerms(termSet);
   Iterator<Term> it = termSet.iterator();
   while (it.hasNext()) {
     if (!(it.next().field().equals("contents"))) it.remove();
   }
   Term[] terms = termSet.toArray(new Term[] {});
   IndexSearcher searcher = SearcherCache.getInstance().getLocalSearcher(iid);
   int[] df = searcher.docFreqs(terms);
   Highlight.highlight(
       hits,
       iid,
       terms,
       df,
       searcher.maxDoc(),
       parser.getWordsClean(),
       StopWords.getPredefinedSet(iid),
       false,
       null,
       false,
       false);
 }
  public void testMakeHighlightDocuments() {
    IndexId iid = IndexId.get("enwiki");
    String text =
        "Some very [[simple]] text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]";
    int references = 100;
    int redirectTargetNamespace = -1;
    ArrayList<Redirect> redirects = new ArrayList<Redirect>();
    redirects.add(new Redirect(0, "Redirect", 2));
    ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>();
    rel.add(new RelatedTitle(new Title(0, "Related test"), 50));
    Hashtable<String, Integer> anchors = new Hashtable<String, Integer>();
    anchors.put("Anchor", 20);
    Date date = new Date();

    Article article =
        new Article(
            10,
            0,
            "Test page",
            text,
            null,
            references,
            redirectTargetNamespace,
            0,
            redirects,
            rel,
            anchors,
            date);

    analyzer = Analyzers.getHighlightAnalyzer(iid, false);
    try {
      doc = WikiIndexModifier.makeHighlightDocument(article, new FieldBuilder(iid), iid);
      assertEquals("1 [10]", tokens("pageid"));
      assertEquals("1 [0:Test page]", tokens("key"));
    } catch (IOException e) {
      fail();
    }
  }