public void testKatakanaStemFilter() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_stemmer"); assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class)); String source = "明後日パーティーに行く予定がある。図書館で資料をコピーしました。"; Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); // パーティー should be stemmed by default // (min len) コピー should not be stemmed String[] expected_tokens_katakana = new String[] { "明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た" }; assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); tokenFilter = analysisService.tokenFilter("kuromoji_ks"); assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class)); tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); // パーティー should not be stemmed since min len == 6 // コピー should not be stemmed expected_tokens_katakana = new String[] { "明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た" }; assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); }
public void testIterationMarkCharFilter() throws IOException { AnalysisService analysisService = createAnalysisService(); // test only kanji CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_im_only_kanji"); assertNotNull(charFilterFactory); assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); String source = "ところゞゝゝ、ジヾが、時々、馬鹿々々しい"; String expected = "ところゞゝゝ、ジヾが、時時、馬鹿馬鹿しい"; assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); // test only kana charFilterFactory = analysisService.charFilter("kuromoji_im_only_kana"); assertNotNull(charFilterFactory); assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); expected = "ところどころ、ジジが、時々、馬鹿々々しい"; assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); // test default charFilterFactory = analysisService.charFilter("kuromoji_im_default"); assertNotNull(charFilterFactory); assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); expected = "ところどころ、ジジが、時時、馬鹿馬鹿しい"; assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); }
public void testDefaultsKuromojiAnalysis() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_tokenizer"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); TokenFilterFactory filterFactory = analysisService.tokenFilter("kuromoji_part_of_speech"); assertThat(filterFactory, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_readingform"); assertThat(filterFactory, instanceOf(KuromojiReadingFormFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_baseform"); assertThat(filterFactory, instanceOf(KuromojiBaseFormFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_stemmer"); assertThat(filterFactory, instanceOf(KuromojiKatakanaStemmerFactory.class)); filterFactory = analysisService.tokenFilter("ja_stop"); assertThat(filterFactory, instanceOf(JapaneseStopTokenFilterFactory.class)); NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class)); analyzer = analysisService.analyzer("my_analyzer"); assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class)); assertThat( analyzer.analyzer().tokenStream(null, new StringReader("")), instanceOf(JapaneseTokenizer.class)); CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_iteration_mark"); assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); }
public void testBackCompatOverrideDefaultIndexAndSearchAnalyzer() { Version version = VersionUtils.randomVersionBetween( getRandom(), VersionUtils.getFirstVersion(), VersionUtils.getPreviousVersion(Version.V_3_0_0)); Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); Map<String, AnalyzerProvider> analyzers = new HashMap<>(); analyzers.put("default_index", analyzerProvider("default_index")); analyzers.put("default_search", analyzerProvider("default_search")); AnalysisService analysisService = new AnalysisService( IndexSettingsModule.newIndexSettings("index", settings), analyzers, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap()); assertThat( analysisService.defaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat( analysisService.defaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat( analysisService.defaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); }
@Test public void testArirangAnalysis() { Index index = new Index("test"); Settings settings = settingsBuilder() .put("path.home", "/Users/hwjeong/temp/tmp") .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); Injector parentInjector = new ModulesBuilder() .add( new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(settings))) .createInjector(); Injector injector = new ModulesBuilder() .add( new IndexSettingsModule(index, settings), new IndexNameModule(index), new AnalysisModule( EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class)) .addProcessor(new ArirangAnalysisBinderProcessor())) .createChildInjector(parentInjector); AnalysisService analysisService = injector.getInstance(AnalysisService.class); TokenizerFactory tokenizerFactory = analysisService.tokenizer("arirang_tokenizer"); MatcherAssert.assertThat(tokenizerFactory, instanceOf(ArirangTokenizerFactory.class)); }
@Test public void testDefault() throws IOException { Settings settings = ImmutableSettings.settingsBuilder() .put("index.analysis.filter.limit_default.type", "limit") .build(); AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings); { TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit_default"); String source = "the quick brown fox"; String[] expected = new String[] {"the"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } { TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit"); String source = "the quick brown fox"; String[] expected = new String[] {"the"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); } }
@Inject public MapperService( Index index, @IndexSettings Settings indexSettings, AnalysisService analysisService, SimilarityLookupService similarityLookupService, ScriptService scriptService) { super(index, indexSettings); this.analysisService = analysisService; this.fieldTypes = new FieldTypeLookup(); this.documentParser = new DocumentMapperParser( indexSettings, this, analysisService, similarityLookupService, scriptService); this.indexAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultIndexAnalyzer(), INDEX_ANALYZER_EXTRACTOR); this.searchAnalyzer = new MapperAnalyzerWrapper( analysisService.defaultSearchAnalyzer(), SEARCH_ANALYZER_EXTRACTOR); this.searchQuoteAnalyzer = new MapperAnalyzerWrapper( analysisService.defaultSearchQuoteAnalyzer(), SEARCH_QUOTE_ANALYZER_EXTRACTOR); this.dynamic = indexSettings.getAsBoolean("index.mapper.dynamic", true); defaultPercolatorMappingSource = "{\n" + "\"_default_\":{\n" + "\"properties\" : {\n" + "\"query\" : {\n" + "\"type\" : \"object\",\n" + "\"enabled\" : false\n" + "}\n" + "}\n" + "}\n" + "}"; if (index.getName().equals(ScriptService.SCRIPT_INDEX)) { defaultMappingSource = "{" + "\"_default_\": {" + "\"properties\": {" + "\"script\": { \"enabled\": false }," + "\"template\": { \"enabled\": false }" + "}" + "}" + "}"; } else { defaultMappingSource = "{\"_default_\":{}}"; } if (logger.isTraceEnabled()) { logger.trace( "using dynamic[{}], default mapping source[{}], default percolator mapping source[{}]", dynamic, defaultMappingSource, defaultPercolatorMappingSource); } else if (logger.isDebugEnabled()) { logger.debug("using dynamic[{}]", dynamic); } }
@Test public void testHanOnly() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("cjk_han_only"); String source = "多くの学生が試験に落ちた。"; String[] expected = new String[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"}; Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testBaseFormFilterFactory() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_pos"); assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); String source = "私は制限スピードを超える。"; String[] expected = new String[] {"私", "は", "制限", "スピード", "を"}; Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); }
public void testJapaneseStopFilterFactory() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenFilterFactory tokenFilter = analysisService.tokenFilter("ja_stop"); assertThat(tokenFilter, instanceOf(JapaneseStopTokenFilterFactory.class)); String source = "私は制限スピードを超える。"; String[] expected = new String[] {"私", "制限", "超える"}; Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); }
public void testKuromojiUserDict() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_user_dict"); String source = "私は制限スピードを超える。"; String[] expected = new String[] {"私", "は", "制限スピード", "を", "超える"}; Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); }
public void testFillerToken() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_filler"); String source = "simon the sorcerer"; String[] expected = new String[] {"simon FILLER", "simon FILLER sorcerer", "FILLER sorcerer"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the")); assertTokenStreamContents(tokenFilter.create(stream), expected); }
public void testDefault() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle"); String source = "the quick brown fox"; String[] expected = new String[] {"the", "the quick", "quick", "quick brown", "brown", "brown fox", "fox"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testInverseMappingNoShingles() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_inverse"); assertThat(tokenFilter, instanceOf(ShingleTokenFilterFactory.class)); String source = "the quick"; String[] expected = new String[] {"the", "quick"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testConfigureCamelCaseTokenFilter() throws IOException { // tests a filter that Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); Settings indexSettings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.filter.wordDelimiter.type", "word_delimiter") .put("index.analysis.filter.wordDelimiter.split_on_numerics", false) .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .putArray( "index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") .put("index.analysis.analyzer.custom_analyzer_1.tokenizer", "whitespace") .putArray( "index.analysis.analyzer.custom_analyzer_1.filter", "lowercase", "word_delimiter") .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); AnalysisService analysisService = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); try (NamedAnalyzer custom_analyser = analysisService.analyzer("custom_analyzer")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); List<String> token = new ArrayList<>(); while (tokenStream.incrementToken()) { token.add(charTermAttribute.toString()); } assertEquals(token.toString(), 2, token.size()); assertEquals("j2se", token.get(0)); assertEquals("j2ee", token.get(1)); } try (NamedAnalyzer custom_analyser = analysisService.analyzer("custom_analyzer_1")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); List<String> token = new ArrayList<>(); while (tokenStream.incrementToken()) { token.add(charTermAttribute.toString()); } assertEquals(token.toString(), 6, token.size()); assertEquals("j", token.get(0)); assertEquals("2", token.get(1)); assertEquals("se", token.get(2)); assertEquals("j", token.get(3)); assertEquals("2", token.get(4)); assertEquals("ee", token.get(5)); } }
public void testCaseInsensitiveMapping() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_keep_filter"); assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class)); String source = "hello small world"; String[] expected = new String[] {"hello", "world"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] {1, 2}); }
@Test public void testBasicUsage() throws Exception { Settings settings = ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT) .put("index.analysis.analyzer.myanalyzer.type", "sortform") .put("index.analysis.analyzer.myanalyzer.filter", "sortform") .build(); AnalysisService analysisService = createAnalysisService(settings); NamedAnalyzer myanalyzer = analysisService.analyzer("myanalyzer"); assertAnalyzesTo(myanalyzer, "<<Der>> Titel des Buches", new String[] {"Titel des Buches"}); }
public void testDefault() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings( settingsBuilder() .put("path.home", createTempDir().toString()) .put("index.analysis.filter.my_ascii_folding.type", "asciifolding") .build()); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_ascii_folding"); String source = "Ansprüche"; String[] expected = new String[] {"Anspruche"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
@Test public void testUnicodeUsage() throws Exception { Settings settings = ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT) .put("index.analysis.analyzer.myanalyzer.type", "sortform") .put("index.analysis.analyzer.myanalyzer.filter", "sortform") .build(); AnalysisService analysisService = createAnalysisService(settings); Analyzer myanalyzer = analysisService.analyzer("myanalyzer"); // Unicode 0098: START OF STRING // Unicode 009C: STRING TERMINATOR assertAnalyzesTo( myanalyzer, "\u0098Der\u009c Titel des Buches", new String[] {"Titel des Buches"}); }
public void testPreserveOriginal() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings( settingsBuilder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .put("index.analysis.filter.my_ascii_folding.type", "asciifolding") .put("index.analysis.filter.my_ascii_folding.preserve_original", true) .build()); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_ascii_folding"); String source = "Ansprüche"; String[] expected = new String[] {"Anspruche", "Ansprüche"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
private IndexWriter createWriter() throws IOException { IndexWriter indexWriter = null; try { // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !IndexReader.indexExists(store.directory()); indexWriter = new IndexWriter( store.directory(), analysisService.defaultIndexAnalyzer(), create, deletionPolicy, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.setMergeScheduler(mergeScheduler.newMergeScheduler()); indexWriter.setMergePolicy(mergePolicyProvider.newMergePolicy(indexWriter)); indexWriter.setSimilarity(similarityService.defaultIndexSimilarity()); indexWriter.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); indexWriter.setTermIndexInterval(termIndexInterval); } catch (IOException e) { safeClose(indexWriter); throw e; } return indexWriter; }
public MapperService( IndexSettings indexSettings, AnalysisService analysisService, SimilarityService similarityService, MapperRegistry mapperRegistry, Supplier<QueryShardContext> queryShardContextSupplier) { super(indexSettings); this.analysisService = analysisService; this.fieldTypes = new FieldTypeLookup(); this.documentParser = new DocumentMapperParser( indexSettings, this, analysisService, similarityService, mapperRegistry, queryShardContextSupplier); this.indexAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultIndexAnalyzer(), p -> p.indexAnalyzer()); this.searchAnalyzer = new MapperAnalyzerWrapper(analysisService.defaultSearchAnalyzer(), p -> p.searchAnalyzer()); this.searchQuoteAnalyzer = new MapperAnalyzerWrapper( analysisService.defaultSearchQuoteAnalyzer(), p -> p.searchQuoteAnalyzer()); this.mapperRegistry = mapperRegistry; this.dynamic = this.indexSettings.getValue(INDEX_MAPPER_DYNAMIC_SETTING); if (index().getName().equals(ScriptService.SCRIPT_INDEX)) { defaultMappingSource = "{" + "\"_default_\": {" + "\"properties\": {" + "\"script\": { \"enabled\": false }," + "\"template\": { \"enabled\": false }" + "}" + "}" + "}"; } else { defaultMappingSource = "{\"_default_\":{}}"; } if (logger.isTraceEnabled()) { logger.trace("using dynamic[{}], default mapping source[{}]", dynamic, defaultMappingSource); } else if (logger.isDebugEnabled()) { logger.debug("using dynamic[{}]", dynamic); } }
public void testDefaultAnalyzers() throws IOException { Version version = VersionUtils.randomVersion(getRandom()); Settings settings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, version) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); AnalysisService analysisService = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); assertThat( analysisService.defaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); assertThat( analysisService.defaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); assertThat( analysisService.defaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); }
public void testOverrideDefaultAnalyzer() throws IOException { Version version = VersionUtils.randomVersion(getRandom()); Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); AnalysisService analysisService = new AnalysisService( IndexSettingsModule.newIndexSettings("index", settings), Collections.singletonMap("default", analyzerProvider("default")), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap()); assertThat( analysisService.defaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat( analysisService.defaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat( analysisService.defaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); }
public void testDefaultsCompoundAnalysis() throws Exception { String json = "/org/elasticsearch/index/analysis/stop.json"; Settings settings = Settings.builder() .loadFromStream(json, getClass().getResourceAsStream(json)) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); AnalysisService analysisService = createAnalysisService(idxSettings, settings); NamedAnalyzer analyzer1 = analysisService.analyzer("analyzer1"); assertTokenStreamContents(analyzer1.tokenStream("test", "to be or not to be"), new String[0]); NamedAnalyzer analyzer2 = analysisService.analyzer("analyzer2"); assertTokenStreamContents(analyzer2.tokenStream("test", "to be or not to be"), new String[0]); }
@Inject public MapperService( Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService) { super(index, indexSettings); this.analysisService = analysisService; this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService); this.searchAnalyzer = new SmartIndexNameSearchAnalyzer(analysisService.defaultSearchAnalyzer()); this.dynamic = componentSettings.getAsBoolean("dynamic", true); String defaultMappingLocation = componentSettings.get("default_mapping_location"); URL defaultMappingUrl; if (defaultMappingLocation == null) { try { defaultMappingUrl = environment.resolveConfig("default-mapping.json"); } catch (FailedToResolveConfigException e) { // not there, default to the built in one defaultMappingUrl = indexSettings .getClassLoader() .getResource("org/elasticsearch/index/mapper/default-mapping.json"); } } else { try { defaultMappingUrl = environment.resolveConfig(defaultMappingLocation); } catch (FailedToResolveConfigException e) { // not there, default to the built in one try { defaultMappingUrl = new File(defaultMappingLocation).toURI().toURL(); } catch (MalformedURLException e1) { throw new FailedToResolveConfigException( "Failed to resolve dynamic mapping location [" + defaultMappingLocation + "]"); } } } try { defaultMappingSource = Streams.copyToString( new InputStreamReader(defaultMappingUrl.openStream(), Charsets.UTF_8)); } catch (IOException e) { throw new MapperException( "Failed to load default mapping source from [" + defaultMappingLocation + "]", e); } logger.debug( "using dynamic[{}], default mapping: default_mapping_location[{}], loaded_from[{}] and source[{}]", dynamic, defaultMappingLocation, defaultMappingUrl, defaultMappingSource); }
public void testReadingFormFilterFactory() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_rf"); assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class)); String source = "今夜はロバート先生と話した"; String[] expected_tokens_romaji = new String[] {"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"}; Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_romaji); tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); String[] expected_tokens_katakana = new String[] {"コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ"}; tokenFilter = analysisService.tokenFilter("kuromoji_readingform"); assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); }
public void testBuiltInAnalyzersAreCached() throws IOException { Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); Settings indexSettings = settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); AnalysisService analysisService = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); AnalysisService otherAnalysisSergice = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); final int numIters = randomIntBetween(5, 20); for (int i = 0; i < numIters; i++) { PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values()); assertSame( analysisService.analyzer(preBuiltAnalyzers.name()), otherAnalysisSergice.analyzer(preBuiltAnalyzers.name())); } }
public void testHtmlStripCharFilter() throws Exception { Settings settings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); AnalysisService analysisService = new AnalysisRegistry(null, new Environment(settings)).build(idxSettings); NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter"); assertTokenStreamContents( analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"}); // Repeat one more time to make sure that char filter is reinitialized correctly assertTokenStreamContents( analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"}); }
@Test public void testMappingCharFilter() throws Exception { Index index = new Index("test"); Settings settings = settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.char_filter.my_mapping.type", "mapping") .putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q") .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping") .put("path.home", createTempDir().toString()) .build(); Injector parentInjector = new ModulesBuilder() .add( new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()) .createInjector(); Injector injector = new ModulesBuilder() .add( new IndexSettingsModule(index, settings), new IndexNameModule(index), new AnalysisModule( settings, parentInjector.getInstance(IndicesAnalysisService.class))) .createChildInjector(parentInjector); AnalysisService analysisService = injector.getInstance(AnalysisService.class); NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter"); assertTokenStreamContents( analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"}); // Repeat one more time to make sure that char filter is reinitialized correctly assertTokenStreamContents( analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"}); }