예제 #1
0
 @Before
 public void setup() {
   stream =
       new EmoticonTokenCombiner(new LatinTokenizer.Builder().setKeepPunctuation(true).build());
   termAttr = stream.getAttribute(CharSequenceTermAttribute.class);
   typeAttr = stream.getAttribute(TokenTypeAttribute.class);
 }
예제 #2
0
 private void verify(List<String> tokens, TokenType... types) {
   for (int i = 0; i < tokens.size(); i++) {
     assertTrue(stream.incrementToken());
     assertEquals(tokens.get(i), termAttr.getTermString());
     assertEquals(types[i], typeAttr.getType());
   }
   assertFalse(stream.incrementToken());
 }
예제 #3
0
  @Test
  public void test() {
    stream.reset("this is a smiley :)");
    verify(
        ImmutableList.of("this", "is", "a", "smiley", ":)"),
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.EMOTICON);

    stream.reset("sad smiley :-(");
    verify(
        ImmutableList.of("sad", "smiley", ":-("),
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.EMOTICON);

    stream.reset("smiley with space : )");
    verify(
        ImmutableList.of("smiley", "with", "space", ": )"),
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.EMOTICON);

    stream.reset("First smiley. :p Second smiley :D False smiley :((");
    verify(
        ImmutableList.of(
            "First", "smiley", ".", ":p", "Second", "smiley", ":D", "False", "smiley", ":", "(",
            "("),
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.PUNCTUATION,
        TokenType.EMOTICON,
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.EMOTICON,
        TokenType.TOKEN,
        TokenType.TOKEN,
        TokenType.PUNCTUATION,
        TokenType.PUNCTUATION,
        TokenType.PUNCTUATION);
  }