public void testDefaultsKuromojiAnalysis() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_tokenizer"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); TokenFilterFactory filterFactory = analysisService.tokenFilter("kuromoji_part_of_speech"); assertThat(filterFactory, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_readingform"); assertThat(filterFactory, instanceOf(KuromojiReadingFormFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_baseform"); assertThat(filterFactory, instanceOf(KuromojiBaseFormFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_stemmer"); assertThat(filterFactory, instanceOf(KuromojiKatakanaStemmerFactory.class)); filterFactory = analysisService.tokenFilter("ja_stop"); assertThat(filterFactory, instanceOf(JapaneseStopTokenFilterFactory.class)); NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class)); analyzer = analysisService.analyzer("my_analyzer"); assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class)); assertThat( analyzer.analyzer().tokenStream(null, new StringReader("")), instanceOf(JapaneseTokenizer.class)); CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_iteration_mark"); assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); }
public void testConfigureCamelCaseTokenFilter() throws IOException { Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.filter.wordDelimiter.type", "word_delimiter") .put("index.analysis.filter.wordDelimiter.split_on_numerics", false) .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .putArray( "index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") .put("index.analysis.analyzer.custom_analyzer_1.tokenizer", "whitespace") .putArray( "index.analysis.analyzer.custom_analyzer_1.filter", "lowercase", "word_delimiter") .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); IndexAnalyzers indexAnalyzers = new AnalysisModule(new Environment(settings), emptyList()) .getAnalysisRegistry() .build(idxSettings); try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); List<String> token = new ArrayList<>(); while (tokenStream.incrementToken()) { token.add(charTermAttribute.toString()); } assertEquals(token.toString(), 2, token.size()); assertEquals("j2se", token.get(0)); assertEquals("j2ee", token.get(1)); } try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_1")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); List<String> token = new ArrayList<>(); while (tokenStream.incrementToken()) { token.add(charTermAttribute.toString()); } assertEquals(token.toString(), 6, token.size()); assertEquals("j", token.get(0)); assertEquals("2", token.get(1)); assertEquals("se", token.get(2)); assertEquals("j", token.get(3)); assertEquals("2", token.get(4)); assertEquals("ee", token.get(5)); } }
@Override public void close() { for (NamedAnalyzer analyzer : analyzers.values()) { if (analyzer.scope() == AnalyzerScope.INDEX) { try { analyzer.close(); } catch (NullPointerException e) { // because analyzers are aliased, they might be closed several times // an NPE is thrown in this case, so ignore.... } catch (Exception e) { logger.debug("failed to close analyzer " + analyzer); } } } }
@Override public void close() { for (NamedAnalyzer analyzer : analyzers.values()) { if (analyzer.scope() == AnalyzerScope.INDEX) { try { analyzer.close(); } catch (NullPointerException e) { // because analyzers are aliased, they might be closed several times // an NPE is thrown in this case, so ignore.... // TODO: Analyzer's can no longer have aliases in indices created in 5.x and beyond, // so we only allow the aliases for analyzers on indices created pre 5.x for backwards // compatibility. Once pre 5.0 indices are no longer supported, this check should be // removed. } catch (Exception e) { logger.debug("failed to close analyzer {}", analyzer); } } } }
public void testDefaultsCompoundAnalysis() throws Exception { String json = "/org/elasticsearch/index/analysis/stop.json"; Settings settings = Settings.builder() .loadFromStream(json, getClass().getResourceAsStream(json)) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); AnalysisService analysisService = createAnalysisService(idxSettings, settings); NamedAnalyzer analyzer1 = analysisService.analyzer("analyzer1"); assertTokenStreamContents(analyzer1.tokenStream("test", "to be or not to be"), new String[0]); NamedAnalyzer analyzer2 = analysisService.analyzer("analyzer2"); assertTokenStreamContents(analyzer2.tokenStream("test", "to be or not to be"), new String[0]); }
public void testHtmlStripCharFilter() throws Exception { Settings settings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); AnalysisService analysisService = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter"); assertTokenStreamContents( analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"}); // Repeat one more time to make sure that char filter is reinitialized correctly assertTokenStreamContents( analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"}); }
@Test public void testMappingCharFilter() throws Exception { Index index = new Index("test"); Settings settings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.char_filter.my_mapping.type", "mapping") .putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q") .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping") .put("path.home", createTempDir().toString()) .build(); Injector parentInjector = new ModulesBuilder() .add( new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()) .createInjector(); Injector injector = new ModulesBuilder() .add( new IndexSettingsModule(index, settings), new IndexNameModule(index), new AnalysisModule( settings, parentInjector.getInstance(IndicesAnalysisService.class))) .createChildInjector(parentInjector); AnalysisService analysisService = injector.getInstance(AnalysisService.class); NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter"); assertTokenStreamContents( analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"}); // Repeat one more time to make sure that char filter is reinitialized correctly assertTokenStreamContents( analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"}); }
private void processAnalyzerFactory( String name, AnalyzerProvider<?> analyzerFactory, Map<String, NamedAnalyzer> analyzerAliases, Map<String, NamedAnalyzer> analyzers) { /* * Lucene defaults positionIncrementGap to 0 in all analyzers but * Elasticsearch defaults them to 0 only before version 2.0 * and 100 afterwards so we override the positionIncrementGap if it * doesn't match here. */ int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP; if (analyzerFactory instanceof CustomAnalyzerProvider) { ((CustomAnalyzerProvider) analyzerFactory).build(this); /* * Custom analyzers already default to the correct, version * dependent positionIncrementGap and the user is be able to * configure the positionIncrementGap directly on the analyzer so * we disable overriding the positionIncrementGap to preserve the * user's setting. */ overridePositionIncrementGap = Integer.MIN_VALUE; } Analyzer analyzerF = analyzerFactory.get(); if (analyzerF == null) { throw new IllegalArgumentException( "analyzer [" + analyzerFactory.name() + "] created null analyzer"); } NamedAnalyzer analyzer; if (analyzerF instanceof NamedAnalyzer) { // if we got a named analyzer back, use it... analyzer = (NamedAnalyzer) analyzerF; if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { // unless the positionIncrementGap needs to be overridden analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); } } else { analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); } if (analyzers.containsKey(name)) { throw new IllegalStateException("already registered analyzer with name: " + name); } analyzers.put(name, analyzer); // TODO: remove alias support completely when we no longer support pre 5.0 indices final String analyzerAliasKey = "index.analysis.analyzer." + analyzerFactory.name() + ".alias"; if (indexSettings.getSettings().get(analyzerAliasKey) != null) { if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_5_0_0_alpha6)) { // do not allow alias creation if the index was created on or after v5.0 alpha6 throw new IllegalArgumentException("setting [" + analyzerAliasKey + "] is not supported"); } // the setting is now removed but we only support it for loading indices created before v5.0 deprecationLogger.deprecated( "setting [{}] is only allowed on index [{}] because it was created before 5.x; " + "analyzer aliases can no longer be created on new indices.", analyzerAliasKey, index().getName()); Set<String> aliases = Sets.newHashSet(indexSettings.getSettings().getAsArray(analyzerAliasKey)); for (String alias : aliases) { if (analyzerAliases.putIfAbsent(alias, analyzer) != null) { throw new IllegalStateException( "alias [" + alias + "] is already used by [" + analyzerAliases.get(alias).name() + "]"); } } } }
public AnalysisService( IndexSettings indexSettings, Map<String, AnalyzerProvider> analyzerProviders, Map<String, TokenizerFactory> tokenizerFactoryFactories, Map<String, CharFilterFactory> charFilterFactoryFactories, Map<String, TokenFilterFactory> tokenFilterFactoryFactories) { super(indexSettings); this.tokenizers = unmodifiableMap(tokenizerFactoryFactories); this.charFilters = unmodifiableMap(charFilterFactoryFactories); this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories); analyzerProviders = new HashMap<>(analyzerProviders); if (!analyzerProviders.containsKey("default")) { analyzerProviders.put( "default", new StandardAnalyzerProvider( indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS)); } if (!analyzerProviders.containsKey("default_search")) { analyzerProviders.put("default_search", analyzerProviders.get("default")); } if (!analyzerProviders.containsKey("default_search_quoted")) { analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search")); } Map<String, NamedAnalyzer> analyzers = new HashMap<>(); for (Map.Entry<String, AnalyzerProvider> entry : analyzerProviders.entrySet()) { AnalyzerProvider analyzerFactory = entry.getValue(); String name = entry.getKey(); /* * Lucene defaults positionIncrementGap to 0 in all analyzers but * Elasticsearch defaults them to 0 only before version 2.0 * and 100 afterwards so we override the positionIncrementGap if it * doesn't match here. */ int overridePositionIncrementGap = StringFieldMapper.Defaults.POSITION_INCREMENT_GAP; if (analyzerFactory instanceof CustomAnalyzerProvider) { ((CustomAnalyzerProvider) analyzerFactory).build(this); /* * Custom analyzers already default to the correct, version * dependent positionIncrementGap and the user is be able to * configure the positionIncrementGap directly on the analyzer so * we disable overriding the positionIncrementGap to preserve the * user's setting. */ overridePositionIncrementGap = Integer.MIN_VALUE; } Analyzer analyzerF = analyzerFactory.get(); if (analyzerF == null) { throw new IllegalArgumentException( "analyzer [" + analyzerFactory.name() + "] created null analyzer"); } NamedAnalyzer analyzer; if (analyzerF instanceof NamedAnalyzer) { // if we got a named analyzer back, use it... analyzer = (NamedAnalyzer) analyzerF; if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { // unless the positionIncrementGap needs to be overridden analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); } } else { analyzer = new NamedAnalyzer( name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); } if (analyzers.containsKey(name)) { throw new IllegalStateException("already registered analyzer with name: " + name); } analyzers.put(name, analyzer); String strAliases = this.indexSettings .getSettings() .get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); if (strAliases != null) { for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { analyzers.put(alias, analyzer); } } String[] aliases = this.indexSettings .getSettings() .getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); for (String alias : aliases) { analyzers.put(alias, analyzer); } } NamedAnalyzer defaultAnalyzer = analyzers.get("default"); if (defaultAnalyzer == null) { throw new IllegalArgumentException("no default analyzer configured"); } if (analyzers.containsKey("default_index")) { final Version createdVersion = indexSettings.getIndexVersionCreated(); if (createdVersion.onOrAfter(Version.V_3_0_0)) { throw new IllegalArgumentException( "setting [index.analysis.analyzer.default_index] is not supported anymore, use [index.analysis.analyzer.default] instead for index [" + index().getName() + "]"); } else { deprecationLogger.deprecated( "setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index().getName()); } } defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer; defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer; defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer; for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) { if (analyzer.getKey().startsWith("_")) { throw new IllegalArgumentException( "analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\""); } } this.analyzers = unmodifiableMap(analyzers); }