@Test public void testHanOnly() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("cjk_han_only"); String source = "多くの学生が試験に落ちた。"; String[] expected = new String[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"}; Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testCaseInsensitiveMapping() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_keep_filter"); assertThat(tokenFilter, instanceOf(KeepWordFilterFactory.class)); String source = "hello small world"; String[] expected = new String[] {"hello", "world"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected, new int[] {1, 2}); }
public void testFillerToken() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_filler"); String source = "simon the sorcerer"; String[] expected = new String[] {"simon FILLER", "simon FILLER sorcerer", "FILLER sorcerer"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the")); assertTokenStreamContents(tokenFilter.create(stream), expected); }
public void testInverseMappingNoShingles() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle_inverse"); assertThat(tokenFilter, instanceOf(ShingleTokenFilterFactory.class)); String source = "the quick"; String[] expected = new String[] {"the", "quick"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testDefault() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("shingle"); String source = "the quick brown fox"; String[] expected = new String[] {"the", "the quick", "quick", "quick brown", "brown", "brown fox", "fox"}; Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
public void testLoadWithoutSettings() throws IOException { AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromClassPath(createTempDir(), RESOURCE); TokenFilterFactory tokenFilter = analysisService.tokenFilter("keep"); Assert.assertNull(tokenFilter); }