public void testKeepWordsPathSettings() { Settings settings = Settings.settingsBuilder() .put("path.home", createTempDir().toString()) .put("index.analysis.filter.non_broken_keep_filter.type", "keep") .put( "index.analysis.filter.non_broken_keep_filter.keep_words_path", "does/not/exists.txt") .build(); try { // test our none existing setup is picked up AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); fail("expected an exception due to non existent keep_words_path"); } catch (IllegalArgumentException e) { } catch (IOException e) { fail("expected IAE"); } settings = Settings.settingsBuilder() .put(settings) .put("index.analysis.filter.non_broken_keep_filter.keep_words", new String[] {"test"}) .build(); try { // test our none existing setup is picked up AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); fail( "expected an exception indicating that you can't use [keep_words_path] with [keep_words] "); } catch (IllegalArgumentException e) { } catch (IOException e) { fail("expected IAE"); } }
@Test public void testSettings() throws IOException { { Settings settings = ImmutableSettings.settingsBuilder() .put("index.analysis.filter.limit_1.type", "limit") .put("index.analysis.filter.limit_1.max_token_count", 3) .put("index.analysis.filter.limit_1.consume_all_tokens", true) .build(); AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit_1"); String source = "the quick brown fox"; String[] expected = new String[] {"the", "quick", "brown"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } { Settings settings = ImmutableSettings.settingsBuilder() .put("index.analysis.filter.limit_1.type", "limit") .put("index.analysis.filter.limit_1.max_token_count", 3) .put("index.analysis.filter.limit_1.consume_all_tokens", false) .build(); AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit_1"); String source = "the quick brown fox"; String[] expected = new String[] {"the", "quick", "brown"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } { Settings settings = ImmutableSettings.settingsBuilder() .put("index.analysis.filter.limit_1.type", "limit") .put("index.analysis.filter.limit_1.max_token_count", 17) .put("index.analysis.filter.limit_1.consume_all_tokens", true) .build(); AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit_1"); String source = "the quick brown fox"; String[] expected = new String[] {"the", "quick", "brown", "fox"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } }
@Test public void testDefault() throws IOException { Settings settings = ImmutableSettings.settingsBuilder() .put("index.analysis.filter.limit_default.type", "limit") .build(); AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); { TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit_default"); String source = "the quick brown fox"; String[] expected = new String[] {"the"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } { TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit"); String source = "the quick brown fox"; String[] expected = new String[] {"the"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } }
@Test public void testHanOnly() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("cjk_han_only"); String source = "多くの学生が試験に落ちた。"; String[] expected = new String[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"}; Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testCaseInsensitiveMapping() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_keep_filter"); assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class)); String source = "hello small world"; String[] expected = new String[] {"hello", "world"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] {1, 2}); }
public void testFillerToken() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_filler"); String source = "simon the sorcerer"; String[] expected = new String[] {"simon FILLER", "simon FILLER sorcerer", "FILLER sorcerer"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the")); assertTokenStreamContents(tokenFilter.create(stream), expected); }
public void testInverseMappingNoShingles() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_inverse"); assertThat(tokenFilter, instanceOf(ShingleTokenFilterFactory.class)); String source = "the quick"; String[] expected = new String[] {"the", "quick"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testDefault() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle"); String source = "the quick brown fox"; String[] expected = new String[] {"the", "the quick", "quick", "quick brown", "brown", "brown fox", "fox"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testDefault() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings( settingsBuilder() .put("path.home", createTempDir().toString()) .put("index.analysis.filter.my_ascii_folding.type", "asciifolding") .build()); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_ascii_folding"); String source = "Ansprüche"; String[] expected = new String[] {"Anspruche"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testThatSuggestStopFilterWorks() throws Exception { Settings settings = Settings.builder() .put("index.analysis.filter.my_stop.type", "stop") .put("index.analysis.filter.my_stop.remove_trailing", false) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_stop"); assertThat(tokenFilter, instanceOf(StopTokenFilterFactory.class)); Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("foo an")); TokenStream create = tokenFilter.create(tokenizer); assertThat(create, instanceOf(SuggestStopFilter.class)); }
public void testPreserveOriginal() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings( settingsBuilder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .put("index.analysis.filter.my_ascii_folding.type", "asciifolding") .put("index.analysis.filter.my_ascii_folding.preserve_original", true) .build()); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_ascii_folding"); String source = "Ansprüche"; String[] expected = new String[] {"Anspruche", "Ansprüche"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testLoadOverConfiguredSettings() { Settings settings = Settings.settingsBuilder() .put("path.home", createTempDir().toString()) .put("index.analysis.filter.broken_keep_filter.type", "keep") .put("index.analysis.filter.broken_keep_filter.keep_words_path", "does/not/exists.txt") .put("index.analysis.filter.broken_keep_filter.keep_words", "[\"Hello\", \"worlD\"]") .build(); try { AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); Assert.fail("path and array are configured"); } catch (IllegalArgumentException e) { } catch (IOException e) { fail("expected IAE"); } }
public void testCorrectPositionIncrementSetting() throws IOException { Builder builder = Settings.builder().put("index.analysis.filter.my_stop.type", "stop"); if (random().nextBoolean()) { builder.put("index.analysis.filter.my_stop.version", Version.LATEST); } else { // don't specify } builder.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()); ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(builder.build()); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_stop"); assertThat(tokenFilter, instanceOf(StopTokenFilterFactory.class)); Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("foo bar")); TokenStream create = tokenFilter.create(tokenizer); assertThat(create, instanceOf(StopFilter.class)); }
public void testPositionIncrementSetting() throws IOException { Builder builder = Settings.builder() .put("index.analysis.filter.my_stop.type", "stop") .put("index.analysis.filter.my_stop.enable_position_increments", false); if (random().nextBoolean()) { builder.put("index.analysis.filter.my_stop.version", "5.0"); } builder.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()); Settings settings = builder.build(); try { AnalysisTestsHelper.createTestAnalysisFromSettings(settings); fail("Expected IllegalArgumentException"); } catch (IllegalArgumentException e) { assertThat( e.getMessage(), containsString("enable_position_increments is not supported anymore")); } }
public void testLoadWithoutSettings() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("keep"); Assert.assertNull(tokenFilter); }