@Test public void testWordListPath() throws Exception { Environment env = new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS); String[] words = new String[] {"donau", "dampf", "schiff", "spargel", "creme", "suppe"}; File wordListFile = generateWordList(words); Settings settings = settingsBuilder() .loadFromSource("index: \n word_list_path: " + wordListFile.getAbsolutePath()) .build(); Set<String> wordList = Analysis.getWordSet(env, settings, "index.word_list"); MatcherAssert.assertThat(wordList.size(), equalTo(6)); MatcherAssert.assertThat(wordList, hasItems(words)); }
private void testSimpleConfiguration(Settings settings) { Index index = new Index("test"); Injector parentInjector = new ModulesBuilder() .add( new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()) .createInjector(); Injector injector = new ModulesBuilder() .add( new IndexSettingsModule(index, settings), new IndexNameModule(index), new AnalysisModule( settings, parentInjector.getInstance(IndicesAnalysisService.class))) .createChildInjector(parentInjector); AnalysisService analysisService = injector.getInstance(AnalysisService.class); Analyzer analyzer = analysisService.analyzer("custom1").analyzer(); assertThat(analyzer, instanceOf(CustomAnalyzer.class)); CustomAnalyzer custom1 = (CustomAnalyzer) analyzer; assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class)); assertThat(custom1.tokenFilters().length, equalTo(2)); StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0]; assertThat(stop1.stopWords().size(), equalTo(1)); assertThat((Iterable<String>) stop1.stopWords(), hasItem("test-stop")); analyzer = analysisService.analyzer("custom2").analyzer(); assertThat(analyzer, instanceOf(CustomAnalyzer.class)); CustomAnalyzer custom2 = (CustomAnalyzer) analyzer; // HtmlStripCharFilterFactory html = (HtmlStripCharFilterFactory) // custom2.charFilters()[0]; // assertThat(html.readAheadLimit(), equalTo(HTMLStripCharFilter.DEFAULT_READ_AHEAD)); // // html = (HtmlStripCharFilterFactory) custom2.charFilters()[1]; // assertThat(html.readAheadLimit(), equalTo(1024)); // verify characters mapping analyzer = analysisService.analyzer("custom5").analyzer(); assertThat(analyzer, instanceOf(CustomAnalyzer.class)); CustomAnalyzer custom5 = (CustomAnalyzer) analyzer; assertThat(custom5.tokenFilters()[0], instanceOf(MappingCharFilterFactory.class)); // verify aliases analyzer = analysisService.analyzer("alias1").analyzer(); assertThat(analyzer, instanceOf(StandardAnalyzer.class)); // check phonetic analyzer = analysisService.analyzer("custom3").analyzer(); assertThat(analyzer, instanceOf(CustomAnalyzer.class)); CustomAnalyzer custom3 = (CustomAnalyzer) analyzer; assertThat(custom3.tokenFilters()[0], instanceOf(PhoneticTokenFilterFactory.class)); // check custom class name (my) analyzer = analysisService.analyzer("custom4").analyzer(); assertThat(analyzer, instanceOf(CustomAnalyzer.class)); CustomAnalyzer custom4 = (CustomAnalyzer) analyzer; assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class)); // // verify Czech stemmer // analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer(); // assertThat(analyzer, instanceOf(CustomAnalyzer.class)); // CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer; // assertThat(czechstemmeranalyzer.tokenizerFactory(), // instanceOf(StandardTokenizerFactory.class)); // assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4)); // assertThat(czechstemmeranalyzer.tokenFilters()[3], // instanceOf(CzechStemTokenFilterFactory.class)); // // // check dictionary decompounder // analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer(); // assertThat(analyzer, instanceOf(CustomAnalyzer.class)); // CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer; // assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), // instanceOf(StandardTokenizerFactory.class)); // assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1)); // assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], // instanceOf(DictionaryCompoundWordTokenFilterFactory.class)); Set<String> wordList = Analysis.getWordSet(null, settings, "index.analysis.filter.dict_dec.word_list"); MatcherAssert.assertThat(wordList.size(), equalTo(6)); MatcherAssert.assertThat( wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe")); }