コード例 #1
0
  @Test
  public void testWordListPath() throws Exception {
    Environment env = new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS);
    String[] words = new String[] {"donau", "dampf", "schiff", "spargel", "creme", "suppe"};

    File wordListFile = generateWordList(words);
    Settings settings =
        settingsBuilder()
            .loadFromSource("index: \n  word_list_path: " + wordListFile.getAbsolutePath())
            .build();

    Set<String> wordList = Analysis.getWordSet(env, settings, "index.word_list");
    MatcherAssert.assertThat(wordList.size(), equalTo(6));
    MatcherAssert.assertThat(wordList, hasItems(words));
  }
コード例 #2
0
  private void testSimpleConfiguration(Settings settings) {
    Index index = new Index("test");
    Injector parentInjector =
        new ModulesBuilder()
            .add(
                new SettingsModule(settings),
                new EnvironmentModule(new Environment(settings)),
                new IndicesAnalysisModule())
            .createInjector();
    Injector injector =
        new ModulesBuilder()
            .add(
                new IndexSettingsModule(index, settings),
                new IndexNameModule(index),
                new AnalysisModule(
                    settings, parentInjector.getInstance(IndicesAnalysisService.class)))
            .createChildInjector(parentInjector);

    AnalysisService analysisService = injector.getInstance(AnalysisService.class);

    Analyzer analyzer = analysisService.analyzer("custom1").analyzer();

    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
    assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    assertThat(custom1.tokenFilters().length, equalTo(2));

    StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
    assertThat(stop1.stopWords().size(), equalTo(1));
    assertThat((Iterable<String>) stop1.stopWords(), hasItem("test-stop"));

    analyzer = analysisService.analyzer("custom2").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom2 = (CustomAnalyzer) analyzer;

    //        HtmlStripCharFilterFactory html = (HtmlStripCharFilterFactory)
    // custom2.charFilters()[0];
    //        assertThat(html.readAheadLimit(), equalTo(HTMLStripCharFilter.DEFAULT_READ_AHEAD));
    //
    //        html = (HtmlStripCharFilterFactory) custom2.charFilters()[1];
    //        assertThat(html.readAheadLimit(), equalTo(1024));

    // verify characters  mapping
    analyzer = analysisService.analyzer("custom5").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
    assertThat(custom5.tokenFilters()[0], instanceOf(MappingCharFilterFactory.class));

    // verify aliases
    analyzer = analysisService.analyzer("alias1").analyzer();
    assertThat(analyzer, instanceOf(StandardAnalyzer.class));

    // check phonetic
    analyzer = analysisService.analyzer("custom3").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
    assertThat(custom3.tokenFilters()[0], instanceOf(PhoneticTokenFilterFactory.class));

    // check custom class name (my)
    analyzer = analysisService.analyzer("custom4").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
    assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));

    //        // verify Czech stemmer
    //        analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
    //        assertThat(czechstemmeranalyzer.tokenizerFactory(),
    // instanceOf(StandardTokenizerFactory.class));
    //        assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
    //        assertThat(czechstemmeranalyzer.tokenFilters()[3],
    // instanceOf(CzechStemTokenFilterFactory.class));
    //
    //        // check dictionary decompounder
    //        analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
    //        assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(),
    // instanceOf(StandardTokenizerFactory.class));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0],
    // instanceOf(DictionaryCompoundWordTokenFilterFactory.class));

    Set<String> wordList =
        Analysis.getWordSet(null, settings, "index.analysis.filter.dict_dec.word_list");
    MatcherAssert.assertThat(wordList.size(), equalTo(6));
    MatcherAssert.assertThat(
        wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
  }