@Before
 public void setup() {
   stream =
       new HashtagTokenCombiner(
           // This toknizes text into alphabet-only, number-only tokens
           // and '#', '_'.
           new RegexExtractor.Builder()
               .setRegexPattern(Pattern.compile("([0-9]+|[a-zA-Z]+|\\p{InKatakana}+|#|_)"))
               .build());
   termAttr = stream.getAttribute(CharSequenceTermAttribute.class);
   typeAttr = stream.getAttribute(TokenTypeAttribute.class);
 }