@Override public JapaneseTokenizer init(final byte[] txt) { String source = string(txt); if (wc) { // convert wide-space to space source = source.replace('\u3000', '\u0020'); } final ArrayList<?> morpheme = (ArrayList<?>) Reflect.invoke(parse, tagger, source); final ArrayList<Morpheme> list = new ArrayList<>(); try { int prev = 0; final int ms = morpheme.size(); for (int i = 0; i < ms; i++) { final Object m = morpheme.get(i); final String srfc = surface.get(m).toString(); final String ftr = feature.get(m).toString(); final int strt = start.getInt(m); if (i != 0) { final int l = strt - prev; if (l != 0) { list.add(new Morpheme(source.substring(strt - 1, strt + l - 1), KIGOU_FEATURE)); } } prev = srfc.length() + strt; // separates continuous mark (ASCII) boolean cont = true; final ArrayList<Morpheme> marks = new ArrayList<>(); final int sl = srfc.length(); for (int s = 0; s < sl; s++) { final String c = String.valueOf(srfc.charAt(s)); final byte[] t = token(c); if (t.length == 1) { if (letter(t[0]) || digit(t[0])) cont = false; else marks.add(new Morpheme(c, KIGOU_FEATURE)); } else { cont = false; } } if (cont) list.addAll(marks); else list.add(new Morpheme(srfc, ftr)); } } catch (final Exception ex) { Util.errln(Util.className(this) + ": " + ex); } tokenList = list; tokens = list.iterator(); return this; }
static { IOFile dic = null; if (Reflect.available(PATTERN)) { dic = new IOFile(LANG); if (!dic.exists()) { dic = new IOFile(Prop.HOME, "etc/" + LANG); if (!dic.exists()) { available = false; } } } else { available = false; } if (available) { Class<?> clz = Reflect.find(PATTERN); if (clz == null) { Util.debug("Could not initialize Igo Japanese lexer."); } else { /* Igo constructor. */ final Constructor<?> tgr = Reflect.find(clz, String.class); tagger = Reflect.get(tgr, dic.path()); if (tagger == null) { available = false; Util.debug("Could not initialize Igo Japanese lexer."); } else { parse = Reflect.method(clz, "parse", CharSequence.class); if (parse == null) { Util.debug("Could not initialize Igo lexer method."); } clz = Reflect.find("net.reduls.igo.Morpheme"); surface = Reflect.field(clz, "surface"); feature = Reflect.field(clz, "feature"); start = Reflect.field(clz, "start"); } } } }