public void testKatakanaStemFilter() throws IOException {
    AnalysisService analysisService = createAnalysisService();
    TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_stemmer");
    assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class));
    String source = "明後日パーティーに行く予定がある。図書館で資料をコピーしました。";

    Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
    tokenizer.setReader(new StringReader(source));

    // パーティー should be stemmed by default
    // (min len) コピー should not be stemmed
    String[] expected_tokens_katakana =
        new String[] {
          "明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"
        };
    assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);

    tokenFilter = analysisService.tokenFilter("kuromoji_ks");
    assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class));
    tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
    tokenizer.setReader(new StringReader(source));

    // パーティー should not be stemmed since min len == 6
    // コピー should not be stemmed
    expected_tokens_katakana =
        new String[] {
          "明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"
        };
    assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
  }
  public void testIterationMarkCharFilter() throws IOException {
    AnalysisService analysisService = createAnalysisService();
    // test only kanji
    CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_im_only_kanji");
    assertNotNull(charFilterFactory);
    assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class));

    String source = "ところゞゝゝ、ジヾが、時々、馬鹿々々しい";
    String expected = "ところゞゝゝ、ジヾが、時時、馬鹿馬鹿しい";

    assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected);

    // test only kana

    charFilterFactory = analysisService.charFilter("kuromoji_im_only_kana");
    assertNotNull(charFilterFactory);
    assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class));

    expected = "ところどころ、ジジが、時々、馬鹿々々しい";

    assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected);

    // test default

    charFilterFactory = analysisService.charFilter("kuromoji_im_default");
    assertNotNull(charFilterFactory);
    assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class));

    expected = "ところどころ、ジジが、時時、馬鹿馬鹿しい";

    assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected);
  }
  public void testDefaultsKuromojiAnalysis() throws IOException {
    AnalysisService analysisService = createAnalysisService();

    TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_tokenizer");
    assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class));

    TokenFilterFactory filterFactory = analysisService.tokenFilter("kuromoji_part_of_speech");
    assertThat(filterFactory, instanceOf(KuromojiPartOfSpeechFilterFactory.class));

    filterFactory = analysisService.tokenFilter("kuromoji_readingform");
    assertThat(filterFactory, instanceOf(KuromojiReadingFormFilterFactory.class));

    filterFactory = analysisService.tokenFilter("kuromoji_baseform");
    assertThat(filterFactory, instanceOf(KuromojiBaseFormFilterFactory.class));

    filterFactory = analysisService.tokenFilter("kuromoji_stemmer");
    assertThat(filterFactory, instanceOf(KuromojiKatakanaStemmerFactory.class));

    filterFactory = analysisService.tokenFilter("ja_stop");
    assertThat(filterFactory, instanceOf(JapaneseStopTokenFilterFactory.class));

    NamedAnalyzer analyzer = analysisService.analyzer("kuromoji");
    assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class));

    analyzer = analysisService.analyzer("my_analyzer");
    assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class));
    assertThat(
        analyzer.analyzer().tokenStream(null, new StringReader("")),
        instanceOf(JapaneseTokenizer.class));

    CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_iteration_mark");
    assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class));
  }
 public void testBackCompatOverrideDefaultIndexAndSearchAnalyzer() {
   Version version =
       VersionUtils.randomVersionBetween(
           getRandom(),
           VersionUtils.getFirstVersion(),
           VersionUtils.getPreviousVersion(Version.V_3_0_0));
   Settings settings =
       Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
   Map<String, AnalyzerProvider> analyzers = new HashMap<>();
   analyzers.put("default_index", analyzerProvider("default_index"));
   analyzers.put("default_search", analyzerProvider("default_search"));
   AnalysisService analysisService =
       new AnalysisService(
           IndexSettingsModule.newIndexSettings("index", settings),
           analyzers,
           Collections.emptyMap(),
           Collections.emptyMap(),
           Collections.emptyMap());
   assertThat(
       analysisService.defaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
   assertThat(
       analysisService.defaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
   assertThat(
       analysisService.defaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
 }
  @Test
  public void testArirangAnalysis() {
    Index index = new Index("test");
    Settings settings =
        settingsBuilder()
            .put("path.home", "/Users/hwjeong/temp/tmp")
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();
    Injector parentInjector =
        new ModulesBuilder()
            .add(
                new SettingsModule(EMPTY_SETTINGS),
                new EnvironmentModule(new Environment(settings)))
            .createInjector();
    Injector injector =
        new ModulesBuilder()
            .add(
                new IndexSettingsModule(index, settings),
                new IndexNameModule(index),
                new AnalysisModule(
                        EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class))
                    .addProcessor(new ArirangAnalysisBinderProcessor()))
            .createChildInjector(parentInjector);

    AnalysisService analysisService = injector.getInstance(AnalysisService.class);

    TokenizerFactory tokenizerFactory = analysisService.tokenizer("arirang_tokenizer");
    MatcherAssert.assertThat(tokenizerFactory, instanceOf(ArirangTokenizerFactory.class));
  }
 @Test
 public void testDefault() throws IOException {
   Settings settings =
       ImmutableSettings.settingsBuilder()
           .put("index.analysis.filter.limit_default.type", "limit")
           .build();
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
   {
     TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit_default");
     String source = "the quick brown fox";
     String[] expected = new String[] {"the"};
     Tokenizer tokenizer = new WhitespaceTokenizer();
     tokenizer.setReader(new StringReader(source));
     assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
   }
   {
     TokenFilterFactory tokenFilter = analysisService.tokenFilter("limit");
     String source = "the quick brown fox";
     String[] expected = new String[] {"the"};
     Tokenizer tokenizer = new WhitespaceTokenizer();
     tokenizer.setReader(new StringReader(source));
     assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
   }
 }
  @Inject
  public MapperService(
      Index index,
      @IndexSettings Settings indexSettings,
      AnalysisService analysisService,
      SimilarityLookupService similarityLookupService,
      ScriptService scriptService) {
    super(index, indexSettings);
    this.analysisService = analysisService;
    this.fieldTypes = new FieldTypeLookup();
    this.documentParser =
        new DocumentMapperParser(
            indexSettings, this, analysisService, similarityLookupService, scriptService);
    this.indexAnalyzer =
        new MapperAnalyzerWrapper(analysisService.defaultIndexAnalyzer(), INDEX_ANALYZER_EXTRACTOR);
    this.searchAnalyzer =
        new MapperAnalyzerWrapper(
            analysisService.defaultSearchAnalyzer(), SEARCH_ANALYZER_EXTRACTOR);
    this.searchQuoteAnalyzer =
        new MapperAnalyzerWrapper(
            analysisService.defaultSearchQuoteAnalyzer(), SEARCH_QUOTE_ANALYZER_EXTRACTOR);

    this.dynamic = indexSettings.getAsBoolean("index.mapper.dynamic", true);
    defaultPercolatorMappingSource =
        "{\n"
            + "\"_default_\":{\n"
            + "\"properties\" : {\n"
            + "\"query\" : {\n"
            + "\"type\" : \"object\",\n"
            + "\"enabled\" : false\n"
            + "}\n"
            + "}\n"
            + "}\n"
            + "}";
    if (index.getName().equals(ScriptService.SCRIPT_INDEX)) {
      defaultMappingSource =
          "{"
              + "\"_default_\": {"
              + "\"properties\": {"
              + "\"script\": { \"enabled\": false },"
              + "\"template\": { \"enabled\": false }"
              + "}"
              + "}"
              + "}";
    } else {
      defaultMappingSource = "{\"_default_\":{}}";
    }

    if (logger.isTraceEnabled()) {
      logger.trace(
          "using dynamic[{}], default mapping source[{}], default percolator mapping source[{}]",
          dynamic,
          defaultMappingSource,
          defaultPercolatorMappingSource);
    } else if (logger.isDebugEnabled()) {
      logger.debug("using dynamic[{}]", dynamic);
    }
  }
 @Test
 public void testHanOnly() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE);
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("cjk_han_only");
   String source = "多くの学生が試験に落ちた。";
   String[] expected = new String[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"};
   Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
   assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
 }
 public void testBaseFormFilterFactory() throws IOException {
   AnalysisService analysisService = createAnalysisService();
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_pos");
   assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class));
   String source = "私は制限スピードを超える。";
   String[] expected = new String[] {"私", "は", "制限", "スピード", "を"};
   Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
   tokenizer.setReader(new StringReader(source));
   assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
 }
 public void testJapaneseStopFilterFactory() throws IOException {
   AnalysisService analysisService = createAnalysisService();
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("ja_stop");
   assertThat(tokenFilter, instanceOf(JapaneseStopTokenFilterFactory.class));
   String source = "私は制限スピードを超える。";
   String[] expected = new String[] {"私", "制限", "超える"};
   Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
   tokenizer.setReader(new StringReader(source));
   assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
 }
  public void testKuromojiUserDict() throws IOException {
    AnalysisService analysisService = createAnalysisService();
    TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_user_dict");
    String source = "私は制限スピードを超える。";
    String[] expected = new String[] {"私", "は", "制限スピード", "を", "超える"};

    Tokenizer tokenizer = tokenizerFactory.create();
    tokenizer.setReader(new StringReader(source));
    assertSimpleTSOutput(tokenizer, expected);
  }
 public void testFillerToken() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE);
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_filler");
   String source = "simon the sorcerer";
   String[] expected = new String[] {"simon FILLER", "simon FILLER sorcerer", "FILLER sorcerer"};
   Tokenizer tokenizer = new WhitespaceTokenizer();
   tokenizer.setReader(new StringReader(source));
   TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the"));
   assertTokenStreamContents(tokenFilter.create(stream), expected);
 }
 public void testDefault() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE);
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle");
   String source = "the quick brown fox";
   String[] expected =
       new String[] {"the", "the quick", "quick", "quick brown", "brown", "brown fox", "fox"};
   Tokenizer tokenizer = new WhitespaceTokenizer();
   tokenizer.setReader(new StringReader(source));
   assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
 }
 public void testInverseMappingNoShingles() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE);
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_inverse");
   assertThat(tokenFilter, instanceOf(ShingleTokenFilterFactory.class));
   String source = "the quick";
   String[] expected = new String[] {"the", "quick"};
   Tokenizer tokenizer = new WhitespaceTokenizer();
   tokenizer.setReader(new StringReader(source));
   assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
 }
  public void testConfigureCamelCaseTokenFilter() throws IOException {
    // tests a filter that
    Settings settings =
        Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
    Settings indexSettings =
        settingsBuilder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
            .put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
            .putArray(
                "index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
            .put("index.analysis.analyzer.custom_analyzer_1.tokenizer", "whitespace")
            .putArray(
                "index.analysis.analyzer.custom_analyzer_1.filter", "lowercase", "word_delimiter")
            .build();

    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    AnalysisService analysisService =
        new AnalysisRegistry(null, new Environment(settings)).build(idxSettings);
    try (NamedAnalyzer custom_analyser = analysisService.analyzer("custom_analyzer")) {
      assertNotNull(custom_analyser);
      TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee");
      tokenStream.reset();
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      List<String> token = new ArrayList<>();
      while (tokenStream.incrementToken()) {
        token.add(charTermAttribute.toString());
      }
      assertEquals(token.toString(), 2, token.size());
      assertEquals("j2se", token.get(0));
      assertEquals("j2ee", token.get(1));
    }

    try (NamedAnalyzer custom_analyser = analysisService.analyzer("custom_analyzer_1")) {
      assertNotNull(custom_analyser);
      TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee");
      tokenStream.reset();
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      List<String> token = new ArrayList<>();
      while (tokenStream.incrementToken()) {
        token.add(charTermAttribute.toString());
      }
      assertEquals(token.toString(), 6, token.size());
      assertEquals("j", token.get(0));
      assertEquals("2", token.get(1));
      assertEquals("se", token.get(2));
      assertEquals("j", token.get(3));
      assertEquals("2", token.get(4));
      assertEquals("ee", token.get(5));
    }
  }
 public void testCaseInsensitiveMapping() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE);
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_keep_filter");
   assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class));
   String source = "hello small world";
   String[] expected = new String[] {"hello", "world"};
   Tokenizer tokenizer = new WhitespaceTokenizer();
   tokenizer.setReader(new StringReader(source));
   assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] {1, 2});
 }
 @Test
 public void testBasicUsage() throws Exception {
   Settings settings =
       ImmutableSettings.settingsBuilder()
           .put(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT)
           .put("index.analysis.analyzer.myanalyzer.type", "sortform")
           .put("index.analysis.analyzer.myanalyzer.filter", "sortform")
           .build();
   AnalysisService analysisService = createAnalysisService(settings);
   NamedAnalyzer myanalyzer = analysisService.analyzer("myanalyzer");
   assertAnalyzesTo(myanalyzer, "<<Der>> Titel des Buches", new String[] {"Titel des Buches"});
 }
 public void testDefault() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromSettings(
           settingsBuilder()
               .put("path.home", createTempDir().toString())
               .put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
               .build());
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_ascii_folding");
   String source = "Ansprüche";
   String[] expected = new String[] {"Anspruche"};
   Tokenizer tokenizer = new WhitespaceTokenizer();
   tokenizer.setReader(new StringReader(source));
   assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
 }
 @Test
 public void testUnicodeUsage() throws Exception {
   Settings settings =
       ImmutableSettings.settingsBuilder()
           .put(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT)
           .put("index.analysis.analyzer.myanalyzer.type", "sortform")
           .put("index.analysis.analyzer.myanalyzer.filter", "sortform")
           .build();
   AnalysisService analysisService = createAnalysisService(settings);
   Analyzer myanalyzer = analysisService.analyzer("myanalyzer");
   // Unicode 0098: START OF STRING
   // Unicode 009C: STRING TERMINATOR
   assertAnalyzesTo(
       myanalyzer, "\u0098Der\u009c Titel des Buches", new String[] {"Titel des Buches"});
 }
 public void testPreserveOriginal() throws IOException {
   AnalysisService analysisService =
       AnalysisTestsHelper.createAnalysisServiceFromSettings(
           settingsBuilder()
               .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
               .put("index.analysis.filter.my_ascii_folding.type", "asciifolding")
               .put("index.analysis.filter.my_ascii_folding.preserve_original", true)
               .build());
   TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_ascii_folding");
   String source = "Ansprüche";
   String[] expected = new String[] {"Anspruche", "Ansprüche"};
   Tokenizer tokenizer = new WhitespaceTokenizer();
   tokenizer.setReader(new StringReader(source));
   assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
 }
Esempio n. 21
0
 private IndexWriter createWriter() throws IOException {
   IndexWriter indexWriter = null;
   try {
     // release locks when started
     if (IndexWriter.isLocked(store.directory())) {
       logger.warn("shard is locked, releasing lock");
       IndexWriter.unlock(store.directory());
     }
     boolean create = !IndexReader.indexExists(store.directory());
     indexWriter =
         new IndexWriter(
             store.directory(),
             analysisService.defaultIndexAnalyzer(),
             create,
             deletionPolicy,
             IndexWriter.MaxFieldLength.UNLIMITED);
     indexWriter.setMergeScheduler(mergeScheduler.newMergeScheduler());
     indexWriter.setMergePolicy(mergePolicyProvider.newMergePolicy(indexWriter));
     indexWriter.setSimilarity(similarityService.defaultIndexSimilarity());
     indexWriter.setRAMBufferSizeMB(indexingBufferSize.mbFrac());
     indexWriter.setTermIndexInterval(termIndexInterval);
   } catch (IOException e) {
     safeClose(indexWriter);
     throw e;
   }
   return indexWriter;
 }
Esempio n. 22
0
  public MapperService(
      IndexSettings indexSettings,
      AnalysisService analysisService,
      SimilarityService similarityService,
      MapperRegistry mapperRegistry,
      Supplier<QueryShardContext> queryShardContextSupplier) {
    super(indexSettings);
    this.analysisService = analysisService;
    this.fieldTypes = new FieldTypeLookup();
    this.documentParser =
        new DocumentMapperParser(
            indexSettings,
            this,
            analysisService,
            similarityService,
            mapperRegistry,
            queryShardContextSupplier);
    this.indexAnalyzer =
        new MapperAnalyzerWrapper(analysisService.defaultIndexAnalyzer(), p -> p.indexAnalyzer());
    this.searchAnalyzer =
        new MapperAnalyzerWrapper(analysisService.defaultSearchAnalyzer(), p -> p.searchAnalyzer());
    this.searchQuoteAnalyzer =
        new MapperAnalyzerWrapper(
            analysisService.defaultSearchQuoteAnalyzer(), p -> p.searchQuoteAnalyzer());
    this.mapperRegistry = mapperRegistry;

    this.dynamic = this.indexSettings.getValue(INDEX_MAPPER_DYNAMIC_SETTING);
    if (index().getName().equals(ScriptService.SCRIPT_INDEX)) {
      defaultMappingSource =
          "{"
              + "\"_default_\": {"
              + "\"properties\": {"
              + "\"script\": { \"enabled\": false },"
              + "\"template\": { \"enabled\": false }"
              + "}"
              + "}"
              + "}";
    } else {
      defaultMappingSource = "{\"_default_\":{}}";
    }

    if (logger.isTraceEnabled()) {
      logger.trace("using dynamic[{}], default mapping source[{}]", dynamic, defaultMappingSource);
    } else if (logger.isDebugEnabled()) {
      logger.debug("using dynamic[{}]", dynamic);
    }
  }
 public void testDefaultAnalyzers() throws IOException {
   Version version = VersionUtils.randomVersion(getRandom());
   Settings settings =
       Settings.builder()
           .put(IndexMetaData.SETTING_VERSION_CREATED, version)
           .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
           .build();
   IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
   AnalysisService analysisService =
       new AnalysisRegistry(null, new Environment(settings)).build(idxSettings);
   assertThat(
       analysisService.defaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
   assertThat(
       analysisService.defaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
   assertThat(
       analysisService.defaultSearchQuoteAnalyzer().analyzer(),
       instanceOf(StandardAnalyzer.class));
 }
 public void testOverrideDefaultAnalyzer() throws IOException {
   Version version = VersionUtils.randomVersion(getRandom());
   Settings settings =
       Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
   AnalysisService analysisService =
       new AnalysisService(
           IndexSettingsModule.newIndexSettings("index", settings),
           Collections.singletonMap("default", analyzerProvider("default")),
           Collections.emptyMap(),
           Collections.emptyMap(),
           Collections.emptyMap());
   assertThat(
       analysisService.defaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
   assertThat(
       analysisService.defaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
   assertThat(
       analysisService.defaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
 }
  public void testDefaultsCompoundAnalysis() throws Exception {
    String json = "/org/elasticsearch/index/analysis/stop.json";
    Settings settings =
        Settings.builder()
            .loadFromStream(json, getClass().getResourceAsStream(json))
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
    AnalysisService analysisService = createAnalysisService(idxSettings, settings);

    NamedAnalyzer analyzer1 = analysisService.analyzer("analyzer1");

    assertTokenStreamContents(analyzer1.tokenStream("test", "to be or not to be"), new String[0]);

    NamedAnalyzer analyzer2 = analysisService.analyzer("analyzer2");

    assertTokenStreamContents(analyzer2.tokenStream("test", "to be or not to be"), new String[0]);
  }
Esempio n. 26
0
  @Inject
  public MapperService(
      Index index,
      @IndexSettings Settings indexSettings,
      Environment environment,
      AnalysisService analysisService) {
    super(index, indexSettings);
    this.analysisService = analysisService;
    this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService);
    this.searchAnalyzer = new SmartIndexNameSearchAnalyzer(analysisService.defaultSearchAnalyzer());

    this.dynamic = componentSettings.getAsBoolean("dynamic", true);
    String defaultMappingLocation = componentSettings.get("default_mapping_location");
    URL defaultMappingUrl;
    if (defaultMappingLocation == null) {
      try {
        defaultMappingUrl = environment.resolveConfig("default-mapping.json");
      } catch (FailedToResolveConfigException e) {
        // not there, default to the built in one
        defaultMappingUrl =
            indexSettings
                .getClassLoader()
                .getResource("org/elasticsearch/index/mapper/default-mapping.json");
      }
    } else {
      try {
        defaultMappingUrl = environment.resolveConfig(defaultMappingLocation);
      } catch (FailedToResolveConfigException e) {
        // not there, default to the built in one
        try {
          defaultMappingUrl = new File(defaultMappingLocation).toURI().toURL();
        } catch (MalformedURLException e1) {
          throw new FailedToResolveConfigException(
              "Failed to resolve dynamic mapping location [" + defaultMappingLocation + "]");
        }
      }
    }

    try {
      defaultMappingSource =
          Streams.copyToString(
              new InputStreamReader(defaultMappingUrl.openStream(), Charsets.UTF_8));
    } catch (IOException e) {
      throw new MapperException(
          "Failed to load default mapping source from [" + defaultMappingLocation + "]", e);
    }

    logger.debug(
        "using dynamic[{}], default mapping: default_mapping_location[{}], loaded_from[{}] and source[{}]",
        dynamic,
        defaultMappingLocation,
        defaultMappingUrl,
        defaultMappingSource);
  }
  public void testReadingFormFilterFactory() throws IOException {
    AnalysisService analysisService = createAnalysisService();
    TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_rf");
    assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class));
    String source = "今夜はロバート先生と話した";
    String[] expected_tokens_romaji =
        new String[] {"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"};

    Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
    tokenizer.setReader(new StringReader(source));

    assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_romaji);

    tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
    tokenizer.setReader(new StringReader(source));
    String[] expected_tokens_katakana = new String[] {"コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ"};
    tokenFilter = analysisService.tokenFilter("kuromoji_readingform");
    assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class));
    assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
  }
 public void testBuiltInAnalyzersAreCached() throws IOException {
   Settings settings =
       Settings.builder()
           .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
           .build();
   Settings indexSettings =
       settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
   IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
   AnalysisService analysisService =
       new AnalysisRegistry(null, new Environment(settings)).build(idxSettings);
   AnalysisService otherAnalysisSergice =
       new AnalysisRegistry(null, new Environment(settings)).build(idxSettings);
   final int numIters = randomIntBetween(5, 20);
   for (int i = 0; i < numIters; i++) {
     PreBuiltAnalyzers preBuiltAnalyzers =
         RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values());
     assertSame(
         analysisService.analyzer(preBuiltAnalyzers.name()),
         otherAnalysisSergice.analyzer(preBuiltAnalyzers.name()));
   }
 }
Esempio n. 29
0
  public void testHtmlStripCharFilter() throws Exception {
    Settings settings =
        settingsBuilder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
            .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip")
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisService analysisService =
        new AnalysisRegistry(null, new Environment(settings)).build(idxSettings);

    NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");

    assertTokenStreamContents(
        analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"});

    // Repeat one more time to make sure that char filter is reinitialized correctly
    assertTokenStreamContents(
        analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"});
  }
Esempio n. 30
0
  @Test
  public void testMappingCharFilter() throws Exception {
    Index index = new Index("test");
    Settings settings =
        settingsBuilder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.char_filter.my_mapping.type", "mapping")
            .putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q")
            .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
            .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping")
            .put("path.home", createTempDir().toString())
            .build();
    Injector parentInjector =
        new ModulesBuilder()
            .add(
                new SettingsModule(settings),
                new EnvironmentModule(new Environment(settings)),
                new IndicesAnalysisModule())
            .createInjector();
    Injector injector =
        new ModulesBuilder()
            .add(
                new IndexSettingsModule(index, settings),
                new IndexNameModule(index),
                new AnalysisModule(
                    settings, parentInjector.getInstance(IndicesAnalysisService.class)))
            .createChildInjector(parentInjector);

    AnalysisService analysisService = injector.getInstance(AnalysisService.class);

    NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");

    assertTokenStreamContents(
        analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"});

    // Repeat one more time to make sure that char filter is reinitialized correctly
    assertTokenStreamContents(
        analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"});
  }