Example #1
0
 /**
  * Returns the token which contains special characters.
  *
  * @return token
  */
 private byte[] getSC() {
   final Morpheme m = tokens.next();
   final String n = m.getSurface();
   if (m.isMark() || m.isAttachedWord()) {
     sc = true;
   } else {
     pos++;
     sc = false;
   }
   return token(n);
 }
Example #2
0
 /**
  * Returns the effective token.
  *
  * @return token
  */
 private byte[] get() {
   pos++;
   String n = currToken.getSurface();
   final int hinshi = currToken.getHinshi();
   if (st && (hinshi == Morpheme.HINSHI_DOUSHI || hinshi == Morpheme.HINSHI_KEIYOUSHI)) {
     n = currToken.getBaseForm();
   }
   byte[] t = token(n);
   final boolean a = ascii(t);
   if (!a && !dc) t = noDiacritics(t);
   if (cs == FTCase.UPPER) t = WesternTokenizer.upper(t, a);
   else if (cs != FTCase.SENSITIVE) t = WesternTokenizer.lower(t, a);
   return toHankaku(t);
 }