static void splitWords(String vodFile) throws Exception {
   String outFile = "./test/OutTest.txt"; // output
   NLPIR nlpir = new NLPIR();
   if (!NLPIR.NLPIR_Init("./file/".getBytes("utf-8"), 1)) { // 1:utf-8  0:gbk
     System.out.println("NLPIR initialization failed ...");
     return;
   }
   BufferedReader reader = null;
   reader = new BufferedReader(new InputStreamReader(new FileInputStream(vodFile), "UTF-8"));
   String temp = null;
   String result = null;
   String strline = null;
   int line = 0;
   while ((temp = reader.readLine()) != null) {
     line++;
   }
   strline = String.valueOf(line);
   appendFile(strline, outFile, "UTF-8"); // lines
   temp = null;
   reader =
       new BufferedReader(
           new InputStreamReader(new FileInputStream(vodFile), "UTF-8")); // read lines
   while ((temp = reader.readLine()) != null) {
     byte[] resBytes =
         nlpir.NLPIR_ParagraphProcess(
             temp.getBytes("UTF-8"), 1); // Processing the results of words spliting
     result = contentFilter(new String(resBytes, "UTF-8")); // UTF-8
     appendFile("\r\n", outFile, "UTF-8"); // Write to file with specific format
     appendFile(result, outFile, "UTF-8");
   }
   NLPIR.NLPIR_Exit();
 }
示例#2
0
 static void splitWords(String vodFile) throws Exception {
   // 创建接口实例
   String outFile = "./test/OutTest.txt"; // output
   NLPIR nlpir = new NLPIR();
   // NLPIR_Init方法第二个参数设置0表示编码为GBK, 1表示UTF8编码(此处结论不够权威)
   if (!NLPIR.NLPIR_Init("./file/".getBytes("utf-8"), 1)) {
     System.out.println("NLPIR初始化失败...");
     return;
   }
   BufferedReader reader = null;
   reader = new BufferedReader(new InputStreamReader(new FileInputStream(vodFile), "UTF-8"));
   String temp = null;
   String result = null;
   String strline = null;
   int line = 0;
   while ((temp = reader.readLine()) != null) {
     line++;
   }
   strline = String.valueOf(line);
   appendFile(strline, outFile, "UTF-8"); // 写入行数
   temp = null;
   reader =
       new BufferedReader(
           new InputStreamReader(new FileInputStream(vodFile), "UTF-8")); // 一次读入一行,直到读入null为文件结束
   while ((temp = reader.readLine()) != null) {
     byte[] resBytes = nlpir.NLPIR_ParagraphProcess(temp.getBytes("UTF-8"), 1); // 分词,1为有词性
     result = contentFilter(new String(resBytes, "UTF-8")); // 处理分词结果
     appendFile("\r\n", outFile, "UTF-8"); // 按格式写入文件
     appendFile(result, outFile, "UTF-8");
     // System.out.println(result);
     // contentFilter(temp)
   }
   // System.out.println(line);
   // System.out.println("end");
   // 退出, 释放资源
   NLPIR.NLPIR_Exit();
 }