示例#1
0
 public static void readWords(String fileOrDirectory, ReadListener l) throws IOException {
   File file = new File(fileOrDirectory);
   File[] files = new File[] {file};
   if (file.isDirectory()) {
     files = file.listFiles();
   }
   for (int i = 0; i < files.length; i++) {
     if (!l.onFileBegin(files[i].getName())) {
       continue;
     }
     BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(files[i])));
     String word;
     while ((word = in.readLine()) != null) {
       l.onWord(word);
     }
     l.onFileEnd(files[i].getName());
     in.close();
   }
 }
 public static void readWords(String fileOrDirectory, ReadListener l, String charsetName)
     throws IOException {
   File file;
   if (fileOrDirectory.startsWith("classpath:")) {
     String name = fileOrDirectory.substring("classpath:".length());
     URL url = FileWordsReader.class.getClassLoader().getResource(name);
     if (url == null) {
       throw new FileNotFoundException("file \"" + name + "\" not found in classpath!");
     }
     file = new File(getUrlPath(url));
   } else {
     file = new File(fileOrDirectory);
     if (!file.exists()) {
       throw new FileNotFoundException("file \"" + fileOrDirectory + "\" not found!");
     }
   }
   ArrayList /*<File>*/ dirs = new ArrayList /*<File>*/();
   LinkedList /*<File>*/ dics = new LinkedList /*<File>*/();
   String dir;
   if (file.isDirectory()) {
     dirs.add(file);
     dir = file.getAbsolutePath();
   } else {
     dics.add(file);
     dir = file.getParentFile().getAbsolutePath();
   }
   int index = 0;
   while (index < dirs.size()) {
     File cur = (File) dirs.get(index++);
     File[] files = cur.listFiles();
     for (int i = 0; i < files.length; i++) {
       File f = files[i];
       if (f.isDirectory()) {
         dirs.add(f);
       } else {
         dics.add(f);
       }
     }
   }
   for (Iterator iter = dics.iterator(); iter.hasNext(); ) {
     File f = (File) iter.next();
     String name = f.getAbsolutePath().substring(dir.length() + 1);
     name = name.replace('\\', '/');
     if (!l.onFileBegin(name)) {
       continue;
     }
     BufferedReader in =
         new BufferedReader(new InputStreamReader(new FileInputStream(f), charsetName));
     String word;
     boolean firstInDic = true;
     while ((word = in.readLine()) != null) {
       if (firstInDic) {
         firstInDic = false;
         // ref:http://www.w3.org/International/questions/qa-utf8-bom
         // ZERO WIDTH NO-BREAK SPACE
         // notepad将文件保存为unitcode或utf-8时会在文件开头保存bom字符串
         // notepad根据是否有bom来识别该文件是否是utf-8编码存储的。
         // 庖丁字典需要将这个字符从词典中去掉
         if (word.length() > 0 && CharSet.isBom(word.charAt(0))) {
           word = word.substring(1);
         }
       }
       l.onWord(word);
     }
     l.onFileEnd(name);
     in.close();
   }
 }