示例#1
0
  @Override
  public JapaneseTokenizer init(final byte[] txt) {
    String source = string(txt);
    if (wc) { // convert wide-space to space
      source = source.replace('\u3000', '\u0020');
    }
    final ArrayList<?> morpheme = (ArrayList<?>) Reflect.invoke(parse, tagger, source);
    final ArrayList<Morpheme> list = new ArrayList<>();
    try {
      int prev = 0;
      final int ms = morpheme.size();
      for (int i = 0; i < ms; i++) {
        final Object m = morpheme.get(i);
        final String srfc = surface.get(m).toString();
        final String ftr = feature.get(m).toString();
        final int strt = start.getInt(m);
        if (i != 0) {
          final int l = strt - prev;
          if (l != 0) {
            list.add(new Morpheme(source.substring(strt - 1, strt + l - 1), KIGOU_FEATURE));
          }
        }
        prev = srfc.length() + strt;

        // separates continuous mark (ASCII)
        boolean cont = true;
        final ArrayList<Morpheme> marks = new ArrayList<>();
        final int sl = srfc.length();
        for (int s = 0; s < sl; s++) {
          final String c = String.valueOf(srfc.charAt(s));
          final byte[] t = token(c);
          if (t.length == 1) {
            if (letter(t[0]) || digit(t[0])) cont = false;
            else marks.add(new Morpheme(c, KIGOU_FEATURE));
          } else {
            cont = false;
          }
        }

        if (cont) list.addAll(marks);
        else list.add(new Morpheme(srfc, ftr));
      }
    } catch (final Exception ex) {
      Util.errln(Util.className(this) + ": " + ex);
    }
    tokenList = list;
    tokens = list.iterator();

    return this;
  }
示例#2
0
 /**
  * Creates any missing folders in the specified path.
  *
  * @param path the path to construct
  */
 public static void createFolders(String path) {
   // work way up path to find existing folder
   File dir = new File(path);
   ArrayList dirs = new ArrayList(); // list of needed directories
   while (!dir.exists()) {
     dirs.add(0, dir);
     int j = path.lastIndexOf("/"); // $NON-NLS-1$
     if (j == -1) {
       break;
     }
     path = path.substring(0, j);
     dir = new File(path);
   }
   // work back down path and create needed directories
   Iterator it = dirs.iterator();
   while (it.hasNext()) {
     dir = (File) it.next();
     dir.mkdir();
   }
 }