public void testConfigureCamelCaseTokenFilter() throws IOException { Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.filter.wordDelimiter.type", "word_delimiter") .put("index.analysis.filter.wordDelimiter.split_on_numerics", false) .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .putArray( "index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") .put("index.analysis.analyzer.custom_analyzer_1.tokenizer", "whitespace") .putArray( "index.analysis.analyzer.custom_analyzer_1.filter", "lowercase", "word_delimiter") .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); IndexAnalyzers indexAnalyzers = new AnalysisModule(new Environment(settings), emptyList()) .getAnalysisRegistry() .build(idxSettings); try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); List<String> token = new ArrayList<>(); while (tokenStream.incrementToken()) { token.add(charTermAttribute.toString()); } assertEquals(token.toString(), 2, token.size()); assertEquals("j2se", token.get(0)); assertEquals("j2ee", token.get(1)); } try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_1")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); List<String> token = new ArrayList<>(); while (tokenStream.incrementToken()) { token.add(charTermAttribute.toString()); } assertEquals(token.toString(), 6, token.size()); assertEquals("j", token.get(0)); assertEquals("2", token.get(1)); assertEquals("se", token.get(2)); assertEquals("j", token.get(3)); assertEquals("2", token.get(4)); assertEquals("ee", token.get(5)); } }
public void testDefaultsCompoundAnalysis() throws Exception { String json = "/org/elasticsearch/index/analysis/stop.json"; Settings settings = Settings.builder() .loadFromStream(json, getClass().getResourceAsStream(json)) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); AnalysisService analysisService = createAnalysisService(idxSettings, settings); NamedAnalyzer analyzer1 = analysisService.analyzer("analyzer1"); assertTokenStreamContents(analyzer1.tokenStream("test", "to be or not to be"), new String[0]); NamedAnalyzer analyzer2 = analysisService.analyzer("analyzer2"); assertTokenStreamContents(analyzer2.tokenStream("test", "to be or not to be"), new String[0]); }
public void testHtmlStripCharFilter() throws Exception { Settings settings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); AnalysisService analysisService = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter"); assertTokenStreamContents( analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"}); // Repeat one more time to make sure that char filter is reinitialized correctly assertTokenStreamContents( analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"}); }
@Test public void testMappingCharFilter() throws Exception { Index index = new Index("test"); Settings settings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.char_filter.my_mapping.type", "mapping") .putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q") .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping") .put("path.home", createTempDir().toString()) .build(); Injector parentInjector = new ModulesBuilder() .add( new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()) .createInjector(); Injector injector = new ModulesBuilder() .add( new IndexSettingsModule(index, settings), new IndexNameModule(index), new AnalysisModule( settings, parentInjector.getInstance(IndicesAnalysisService.class))) .createChildInjector(parentInjector); AnalysisService analysisService = injector.getInstance(AnalysisService.class); NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter"); assertTokenStreamContents( analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"}); // Repeat one more time to make sure that char filter is reinitialized correctly assertTokenStreamContents( analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"}); }