public static void main(String[] args) { String argu = "F:\\Code\\EclipseWorkSpace\\workspace\\nlpir20140928"; // String system_charset = "GBK";//GBK----0 String system_charset = "UTF-8"; int charset_type = 1; int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0"); String nativeBytes = null; if (0 == init_flag) { nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg(); System.err.println("初始化失败!fail reason is " + nativeBytes); return; } String source = "F:\\ExpData\\DataIntegate\\source\\nne\\Weibos\\oneWeiboOneLine.txt"; String dest = "F:\\ExpData\\DataIntegate\\source\\nne\\Weibos\\oneWeiboOneLineFc.txt"; // String nativeBytes = null; try { WeiboParser weiboParser = new WeiboParser(); weiboParser.parse(source, dest, CLibrary.Instance); System.out.println("hello"); CLibrary.Instance.NLPIR_Exit(); } catch (Exception ex) { // TODO Auto-generated catch block ex.printStackTrace(); } }
public static void main(String[] args) throws Exception { String argu = ""; // String system_charset = "GBK";//GBK----0 String system_charset = "GBK"; int charset_type = 1; // int charset_type = 0; // 调用printf打印信息 int init_flag = CLibrary.Instance.NLPIR_Init( argu.getBytes(system_charset), charset_type, "0".getBytes(system_charset)); if (0 == init_flag) { System.err.println("初始化失败!"); return; } String sInput = "据悉,质检总局已将最新有关情况再次通报美方,要求美方加强对输华玉米的产地来源、运输及仓储等环节的管控措施,有效避免输华玉米被未经我国农业部安全评估并批准的转基因品系污染。"; String nativeBytes = null; try { nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sInput, 3); // String nativeStr = new String(nativeBytes, 0, // nativeBytes.length,"utf-8"); System.out.println("分词结果为: " + nativeBytes); // System.out.println("分词结果为: " // + transString(nativeBytes, system_charset, "UTF-8")); // // System.out.println("分词结果为: " // + transString(nativeBytes, "gb2312", "utf-8")); int nCountKey = 0; String nativeByte = CLibrary.Instance.NLPIR_GetKeyWords(sInput, 10, false); System.out.print("关键词提取结果是:" + nativeByte); // int nativeElementSize = 4 * 6 +8;//size of result_t in native // code // int nElement = nativeByte.length / nativeElementSize; // ByteArrayInputStream(nativeByte)); // // nativeByte = new byte[nativeByte.length]; // nCountKey = testNLPIR30.NLPIR_KeyWord(nativeByte, nElement); // // Result[] resultArr = new Result[nCountKey]; // DataInputStream dis = new DataInputStream(new // ByteArrayInputStream(nativeByte)); // for (int i = 0; i < nCountKey; i++) // { // resultArr[i] = new Result(); // resultArr[i].start = Integer.reverseBytes(dis.readInt()); // resultArr[i].length = Integer.reverseBytes(dis.readInt()); // dis.skipBytes(8); // resultArr[i].posId = Integer.reverseBytes(dis.readInt()); // resultArr[i].wordId = Integer.reverseBytes(dis.readInt()); // resultArr[i].word_type = Integer.reverseBytes(dis.readInt()); // resultArr[i].weight = Integer.reverseBytes(dis.readInt()); // } // dis.close(); // // for (int i = 0; i < resultArr.length; i++) // { // System.out.println("start=" + resultArr[i].start + ",length=" + // resultArr[i].length + "pos=" + resultArr[i].posId + "word=" + // resultArr[i].wordId + " weight=" + resultArr[i].weight); // } CLibrary.Instance.NLPIR_Exit(); } catch (Exception ex) { // TODO Auto-generated catch block ex.printStackTrace(); } }
public static void standardMain(String[] args) throws Exception { String argu = "F:\\Code\\EclipseWorkSpace\\workspace\\nlpir20140928"; // String system_charset = "GBK";//GBK----0 String system_charset = "UTF-8"; int charset_type = 1; int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0"); String nativeBytes = null; if (0 == init_flag) { nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg(); System.err.println("初始化失败!fail reason is " + nativeBytes); return; } String sInput = "据悉,质检总局已将最新有关情况再次通报美方,要求美方加强对输华玉米的产地来源、运输及仓储等环节的管控措施,有效避免输华玉米被未经我国农业部安全评估并批准的转基因品系污染。"; // String nativeBytes = null; try { nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sInput, 0); System.out.println("分词结果为: " + nativeBytes); CLibrary.Instance.NLPIR_AddUserWord("要求美方加强对输 n"); CLibrary.Instance.NLPIR_AddUserWord("华玉米的产地来源 n"); nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sInput, 1); System.out.println("增加用户词典后分词结果为: " + nativeBytes); CLibrary.Instance.NLPIR_DelUsrWord("要求美方加强对输"); nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sInput, 1); System.out.println("删除用户词典后分词结果为: " + nativeBytes); int nCountKey = 0; String nativeByte = CLibrary.Instance.NLPIR_GetKeyWords(sInput, 10, false); System.out.print("关键词提取结果是:" + nativeByte); nativeByte = CLibrary.Instance.NLPIR_GetFileKeyWords( "D:\\NLPIR\\feedback\\huawei\\5341\\5341\\产经广场\\2012\\5\\16766.txt", 10, false); System.out.print("关键词提取结果是:" + nativeByte); CLibrary.Instance.NLPIR_Exit(); } catch (Exception ex) { // TODO Auto-generated catch block ex.printStackTrace(); } }