public void testLUCENE_3042() throws Exception { String testString = "t"; Analyzer analyzer = new MockAnalyzer(random()); try (TokenStream stream = analyzer.tokenStream("dummy", testString)) { stream.reset(); while (stream.incrementToken()) { // consume } stream.end(); } assertAnalyzesTo(analyzer, testString, new String[] {"t"}); }
/** Test a configuration where three characters makes a term */ public void testThreeChars() throws Exception { CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("...").toAutomaton()); Analyzer a = new MockAnalyzer(random(), single, false); assertAnalyzesTo(a, "foobar", new String[] {"foo", "bar"}, new int[] {0, 3}, new int[] {3, 6}); // make sure when last term is a "partial" match that end() is correct assertTokenStreamContents( a.tokenStream("bogus", "fooba"), new String[] {"foo"}, new int[] {0}, new int[] {3}, new int[] {1}, new Integer(5)); checkRandomData(random(), a, 100); }
/** Test MockTokenizer encountering a too long token */ public void testTooLongToken() throws Exception { Analyzer whitespace = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false, 5); return new TokenStreamComponents(t, t); } }; assertTokenStreamContents( whitespace.tokenStream("bogus", "test 123 toolong ok "), new String[] {"test", "123", "toolo", "ng", "ok"}, new int[] {0, 5, 9, 14, 17}, new int[] {4, 8, 14, 16, 19}, new Integer(20)); assertTokenStreamContents( whitespace.tokenStream("bogus", "test 123 toolo"), new String[] {"test", "123", "toolo"}, new int[] {0, 5, 9}, new int[] {4, 8, 14}, new Integer(14)); }
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException { List<BytesRef> bytesRefs = new ArrayList<>(); try (TokenStream tokenStream = analyzer.tokenStream(field, text)) { TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef bytesRef = termAttribute.getBytesRef(); tokenStream.reset(); while (tokenStream.incrementToken()) { termAttribute.fillBytesRef(); bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); } tokenStream.end(); } return bytesRefs; }