@Before public void setup() { stream = new EmoticonTokenCombiner(new LatinTokenizer.Builder().setKeepPunctuation(true).build()); termAttr = stream.getAttribute(CharSequenceTermAttribute.class); typeAttr = stream.getAttribute(TokenTypeAttribute.class); }
private void verify(List<String> tokens, TokenType... types) { for (int i = 0; i < tokens.size(); i++) { assertTrue(stream.incrementToken()); assertEquals(tokens.get(i), termAttr.getTermString()); assertEquals(types[i], typeAttr.getType()); } assertFalse(stream.incrementToken()); }
@Test public void test() { stream.reset("this is a smiley :)"); verify( ImmutableList.of("this", "is", "a", "smiley", ":)"), TokenType.TOKEN, TokenType.TOKEN, TokenType.TOKEN, TokenType.TOKEN, TokenType.EMOTICON); stream.reset("sad smiley :-("); verify( ImmutableList.of("sad", "smiley", ":-("), TokenType.TOKEN, TokenType.TOKEN, TokenType.EMOTICON); stream.reset("smiley with space : )"); verify( ImmutableList.of("smiley", "with", "space", ": )"), TokenType.TOKEN, TokenType.TOKEN, TokenType.TOKEN, TokenType.EMOTICON); stream.reset("First smiley. :p Second smiley :D False smiley :(("); verify( ImmutableList.of( "First", "smiley", ".", ":p", "Second", "smiley", ":D", "False", "smiley", ":", "(", "("), TokenType.TOKEN, TokenType.TOKEN, TokenType.PUNCTUATION, TokenType.EMOTICON, TokenType.TOKEN, TokenType.TOKEN, TokenType.EMOTICON, TokenType.TOKEN, TokenType.TOKEN, TokenType.PUNCTUATION, TokenType.PUNCTUATION, TokenType.PUNCTUATION); }