static void splitWords(String vodFile) throws Exception { String outFile = "./test/OutTest.txt"; // output NLPIR nlpir = new NLPIR(); if (!NLPIR.NLPIR_Init("./file/".getBytes("utf-8"), 1)) { // 1:utf-8 0:gbk System.out.println("NLPIR initialization failed ..."); return; } BufferedReader reader = null; reader = new BufferedReader(new InputStreamReader(new FileInputStream(vodFile), "UTF-8")); String temp = null; String result = null; String strline = null; int line = 0; while ((temp = reader.readLine()) != null) { line++; } strline = String.valueOf(line); appendFile(strline, outFile, "UTF-8"); // lines temp = null; reader = new BufferedReader( new InputStreamReader(new FileInputStream(vodFile), "UTF-8")); // read lines while ((temp = reader.readLine()) != null) { byte[] resBytes = nlpir.NLPIR_ParagraphProcess( temp.getBytes("UTF-8"), 1); // Processing the results of words spliting result = contentFilter(new String(resBytes, "UTF-8")); // UTF-8 appendFile("\r\n", outFile, "UTF-8"); // Write to file with specific format appendFile(result, outFile, "UTF-8"); } NLPIR.NLPIR_Exit(); }
static void splitWords(String vodFile) throws Exception { // 创建接口实例 String outFile = "./test/OutTest.txt"; // output NLPIR nlpir = new NLPIR(); // NLPIR_Init方法第二个参数设置0表示编码为GBK, 1表示UTF8编码(此处结论不够权威) if (!NLPIR.NLPIR_Init("./file/".getBytes("utf-8"), 1)) { System.out.println("NLPIR初始化失败..."); return; } BufferedReader reader = null; reader = new BufferedReader(new InputStreamReader(new FileInputStream(vodFile), "UTF-8")); String temp = null; String result = null; String strline = null; int line = 0; while ((temp = reader.readLine()) != null) { line++; } strline = String.valueOf(line); appendFile(strline, outFile, "UTF-8"); // 写入行数 temp = null; reader = new BufferedReader( new InputStreamReader(new FileInputStream(vodFile), "UTF-8")); // 一次读入一行,直到读入null为文件结束 while ((temp = reader.readLine()) != null) { byte[] resBytes = nlpir.NLPIR_ParagraphProcess(temp.getBytes("UTF-8"), 1); // 分词,1为有词性 result = contentFilter(new String(resBytes, "UTF-8")); // 处理分词结果 appendFile("\r\n", outFile, "UTF-8"); // 按格式写入文件 appendFile(result, outFile, "UTF-8"); // System.out.println(result); // contentFilter(temp) } // System.out.println(line); // System.out.println("end"); // 退出, 释放资源 NLPIR.NLPIR_Exit(); }