コード例 #1
0
ファイル: MathUtil.java プロジェクト: 0x376h/ansj_seg
  /**
   * 从一个词的词性到另一个词的词的分数
   *
   * @param form 前面的词
   * @param to 后面的词
   * @return 分数
   */
  public static double compuScore(Term from, Term to) {
    double frequency = from.termNatures().allFreq + 1;

    if (frequency < 0) {
      double score = from.score() + MAX_FREQUENCE;
      from.score(score);
      return score;
    }

    int nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);
    double value =
        -Math.log(
            dSmoothingPara * frequency / (MAX_FREQUENCE + 80000)
                + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));

    if (value < 0) {
      value += frequency;
    }
    return from.score() + value;
  }
コード例 #2
0
 private List<Term> recogntion_() {
   Term term = null;
   Term tempTerm = null;
   List<Term> termList = new ArrayList<Term>();
   int beginFreq = 10;
   for (int i = 0; i < terms.length; i++) {
     term = terms[i];
     if (term == null || !term.getTermNatures().personAttr.flag) {
       continue;
     }
     term.score = 0;
     term.selfScore = 0;
     int freq = 0;
     for (int j = 2; j > -1; j--) {
       freq = term.getTermNatures().personAttr.getFreq(j, 0);
       if ((freq > 10) || (term.getName().length() == 2 && freq > 10)) {
         tempTerm = nameFind(i, beginFreq, j);
         if (tempTerm != null) {
           termList.add(tempTerm);
           // 如果是无争议性识别
           if (skip) {
             for (int j2 = i; j2 < tempTerm.getToValue(); j2++) {
               if (terms[j2] != null) {
                 terms[j2].score = 0;
                 terms[j2].selfScore = 0;
               }
             }
             i = tempTerm.getToValue() - 1;
             break;
           }
         }
       }
     }
     beginFreq = term.getTermNatures().personAttr.begin + 1;
   }
   return termList;
 }