/** * Returns the token which contains special characters. * * @return token */ private byte[] getSC() { final Morpheme m = tokens.next(); final String n = m.getSurface(); if (m.isMark() || m.isAttachedWord()) { sc = true; } else { pos++; sc = false; } return token(n); }
/** * Returns the effective token. * * @return token */ private byte[] get() { pos++; String n = currToken.getSurface(); final int hinshi = currToken.getHinshi(); if (st && (hinshi == Morpheme.HINSHI_DOUSHI || hinshi == Morpheme.HINSHI_KEIYOUSHI)) { n = currToken.getBaseForm(); } byte[] t = token(n); final boolean a = ascii(t); if (!a && !dc) t = noDiacritics(t); if (cs == FTCase.UPPER) t = WesternTokenizer.upper(t, a); else if (cs != FTCase.SENSITIVE) t = WesternTokenizer.lower(t, a); return toHankaku(t); }