コード例 #1
0
  public static void SplitBIG5() {
    try {

      ICTCLAS2011 testICTCLAS2011 = new ICTCLAS2011();

      String argu = ".";
      if (ICTCLAS2011.ICTCLAS_Init(argu.getBytes("GB2312"), 2) == false) { // UTF8切分
        System.out.println("Init Fail!");
        return;
      }
      String argu1 = "TestBIG.txt";
      String argu2 = "TestBIG_result.txt";
      testICTCLAS2011.ICTCLAS_FileProcess(argu1.getBytes("GB2312"), argu2.getBytes("GB2312"), 1);

      // 释放分词组件资源
      ICTCLAS2011.ICTCLAS_Exit();
    } catch (Exception ex) {
    }
  }
コード例 #2
0
  public static void Split(String sInput) {
    try {
      ICTCLAS2011 testICTCLAS2011 = new ICTCLAS2011();

      String argu = "F:\\workspace\\wordAutoErrorCorrection\\";
      System.out.println("ICTCLAS_Init");
      if (ICTCLAS2011.ICTCLAS_Init(argu.getBytes("GB2312"), 0) == false) {
        System.out.println("Init Fail!");
        return;
      }

      /*
       * 设置词性标注集
              ID		    代表词性集
      		1			计算所一级标注集
      		0			计算所二级标注集
      		2			北大二级标注集
      		3			北大一级标注集
      */
      testICTCLAS2011.ICTCLAS_SetPOSmap(2);

      // 导入用户词典前
      byte nativeBytes[] = testICTCLAS2011.ICTCLAS_ParagraphProcess(sInput.getBytes("GB2312"), 0);
      String nativeStr = new String(nativeBytes, 0, nativeBytes.length, "GB2312");

      System.out.println("未导入用户词典: " + nativeStr);

      // 文件分词
      String argu1 = "movie.txt";
      String argu2 = "TestGBK_result.txt";
      testICTCLAS2011.ICTCLAS_FileProcess(argu1.getBytes("GB2312"), argu2.getBytes("GB2312"), 0);

      /*
      //导入用户词典
      String sUserDict = "userdic.txt";
      int nCount = testICTCLAS2011.ICTCLAS_ImportUserDict(sUserDict.getBytes("GB2312"));
      testICTCLAS2011.ICTCLAS_SaveTheUsrDic();//保存用户词典
      System.out.println("导入个用户词: " + nCount);

      nativeBytes = testICTCLAS2011.ICTCLAS_ParagraphProcess(sInput.getBytes("GB2312"), 1);
      nativeStr = new String(nativeBytes, 0, nativeBytes.length, "GB2312");

      System.out.println("导入用户词典后: " + nativeStr);

      //动态添加用户词
      String sWordUser = "******";
      testICTCLAS2011.ICTCLAS_AddUserWord(sWordUser.getBytes("GB2312"));
      testICTCLAS2011.ICTCLAS_SaveTheUsrDic();//保存用户词典

      nativeBytes = testICTCLAS2011.ICTCLAS_ParagraphProcess(sInput.getBytes("GB2312"), 1);
      nativeStr = new String(nativeBytes, 0, nativeBytes.length, "GB2312");
      System.out.println("动态添加用户词后: " + nativeStr);

      //分词高级接口
      nativeBytes = testICTCLAS2011.nativeProcAPara(sInput.getBytes("GB2312"));

      int nativeElementSize = testICTCLAS2011.ICTCLAS_GetElemLength(0);//size of result_t in native code
      int nElement = nativeBytes.length / nativeElementSize;

      byte nativeBytesTmp[] = new byte[nativeBytes.length];

      //关键词提取
      int nCountKey = testICTCLAS2011.ICTCLAS_KeyWord(nativeBytesTmp, nElement);

      Result[] resultArr = new Result[nCountKey];
      DataInputStream dis = new DataInputStream(new ByteArrayInputStream(nativeBytesTmp));

      int iSkipNum;
      for (int i = 0; i < nCountKey; i++)
      {
      	resultArr[i] = new Result();
      	resultArr[i].start = Integer.reverseBytes(dis.readInt());
      	iSkipNum = testICTCLAS2011.ICTCLAS_GetElemLength(1) - 4;
      	if (iSkipNum > 0)
      	{
      		dis.skipBytes(iSkipNum);
      	}

      	resultArr[i].length = Integer.reverseBytes(dis.readInt());
      	iSkipNum = testICTCLAS2011.ICTCLAS_GetElemLength(2) - 4;
      	if (iSkipNum > 0)
      	{
      		dis.skipBytes(iSkipNum);
      	}

      	dis.skipBytes(testICTCLAS2011.ICTCLAS_GetElemLength(3));

      	resultArr[i].posId = Integer.reverseBytes(dis.readInt());
      	iSkipNum = testICTCLAS2011.ICTCLAS_GetElemLength(4) - 4;
      	if (iSkipNum > 0)
      	{
      		dis.skipBytes(iSkipNum);
      	}

      	resultArr[i].wordId = Integer.reverseBytes(dis.readInt());
      	iSkipNum = testICTCLAS2011.ICTCLAS_GetElemLength(5) - 4;
      	if (iSkipNum > 0)
      	{
      		dis.skipBytes(iSkipNum);
      	}

      	resultArr[i].word_type = Integer.reverseBytes(dis.readInt());
      	iSkipNum = testICTCLAS2011.ICTCLAS_GetElemLength(6) - 4;
      	if (iSkipNum > 0)
      	{
      		dis.skipBytes(iSkipNum);
      	}
      	resultArr[i].weight = Integer.reverseBytes(dis.readInt());
      	iSkipNum = testICTCLAS2011.ICTCLAS_GetElemLength(7) - 4;
      	if (iSkipNum > 0)
      	{
      		dis.skipBytes(iSkipNum);
      	}

      }

      dis.close();

      for (int i = 0; i < resultArr.length; i++)
      {
      	System.out.println("start=" + resultArr[i].start + ",length=" + resultArr[i].length + "pos=" + resultArr[i].posId + "word=" + resultArr[i].wordId + "  weight=" + resultArr[i].weight);
      }*/

      // 释放分词组件资源
      ICTCLAS2011.ICTCLAS_Exit();
    } catch (Exception ex) {
    }
  }