public void testConfigureCamelCaseTokenFilter() throws IOException {
    Settings settings =
        Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
    Settings indexSettings =
        Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
            .put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
            .putArray(
                "index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
            .put("index.analysis.analyzer.custom_analyzer_1.tokenizer", "whitespace")
            .putArray(
                "index.analysis.analyzer.custom_analyzer_1.filter", "lowercase", "word_delimiter")
            .build();

    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);

    IndexAnalyzers indexAnalyzers =
        new AnalysisModule(new Environment(settings), emptyList())
            .getAnalysisRegistry()
            .build(idxSettings);
    try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer")) {
      assertNotNull(custom_analyser);
      TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee");
      tokenStream.reset();
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      List<String> token = new ArrayList<>();
      while (tokenStream.incrementToken()) {
        token.add(charTermAttribute.toString());
      }
      assertEquals(token.toString(), 2, token.size());
      assertEquals("j2se", token.get(0));
      assertEquals("j2ee", token.get(1));
    }

    try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_1")) {
      assertNotNull(custom_analyser);
      TokenStream tokenStream = custom_analyser.tokenStream("foo", "J2SE j2ee");
      tokenStream.reset();
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      List<String> token = new ArrayList<>();
      while (tokenStream.incrementToken()) {
        token.add(charTermAttribute.toString());
      }
      assertEquals(token.toString(), 6, token.size());
      assertEquals("j", token.get(0));
      assertEquals("2", token.get(1));
      assertEquals("se", token.get(2));
      assertEquals("j", token.get(3));
      assertEquals("2", token.get(4));
      assertEquals("ee", token.get(5));
    }
  }
Ejemplo n.º 2
0
  public void testDefaultsCompoundAnalysis() throws Exception {
    String json = "/org/elasticsearch/index/analysis/stop.json";
    Settings settings =
        Settings.builder()
            .loadFromStream(json, getClass().getResourceAsStream(json))
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
    AnalysisService analysisService = createAnalysisService(idxSettings, settings);

    NamedAnalyzer analyzer1 = analysisService.analyzer("analyzer1");

    assertTokenStreamContents(analyzer1.tokenStream("test", "to be or not to be"), new String[0]);

    NamedAnalyzer analyzer2 = analysisService.analyzer("analyzer2");

    assertTokenStreamContents(analyzer2.tokenStream("test", "to be or not to be"), new String[0]);
  }
Ejemplo n.º 3
0
  public void testHtmlStripCharFilter() throws Exception {
    Settings settings =
        settingsBuilder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
            .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "html_strip")
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisService analysisService =
        new AnalysisRegistry(null, new Environment(settings)).build(idxSettings);

    NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");

    assertTokenStreamContents(
        analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"});

    // Repeat one more time to make sure that char filter is reinitialized correctly
    assertTokenStreamContents(
        analyzer1.tokenStream("test", "<b>hello</b>!"), new String[] {"hello"});
  }
Ejemplo n.º 4
0
  @Test
  public void testMappingCharFilter() throws Exception {
    Index index = new Index("test");
    Settings settings =
        settingsBuilder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.char_filter.my_mapping.type", "mapping")
            .putArray("index.analysis.char_filter.my_mapping.mappings", "ph=>f", "qu=>q")
            .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
            .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "my_mapping")
            .put("path.home", createTempDir().toString())
            .build();
    Injector parentInjector =
        new ModulesBuilder()
            .add(
                new SettingsModule(settings),
                new EnvironmentModule(new Environment(settings)),
                new IndicesAnalysisModule())
            .createInjector();
    Injector injector =
        new ModulesBuilder()
            .add(
                new IndexSettingsModule(index, settings),
                new IndexNameModule(index),
                new AnalysisModule(
                    settings, parentInjector.getInstance(IndicesAnalysisService.class)))
            .createChildInjector(parentInjector);

    AnalysisService analysisService = injector.getInstance(AnalysisService.class);

    NamedAnalyzer analyzer1 = analysisService.analyzer("custom_with_char_filter");

    assertTokenStreamContents(
        analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"});

    // Repeat one more time to make sure that char filter is reinitialized correctly
    assertTokenStreamContents(
        analyzer1.tokenStream("test", "jeff quit phish"), new String[] {"jeff", "qit", "fish"});
  }