Example #1
0
  /**
   * 新词熵及其左右熵
   *
   * @param all
   */
  public static double leftRightEntropy(List<Term> all) {
    // TODO Auto-generated method stub
    double score = 0;
    NewWordNatureAttr newWordAttr = null;
    Term first = all.get(0);

    // 查看左右链接
    int twoWordFreq = TwoWordLibrary.getTwoWordFreq(first.getFrom(), first);
    score -= twoWordFreq;

    // 查看右连接
    int length = all.size() - 1;
    Term end = all.get(all.size() - 1);
    twoWordFreq = TwoWordLibrary.getTwoWordFreq(end, end.getTo());
    score -= twoWordFreq;

    // 查看内部链接
    for (int i = 0; i < length; i++) {
      score -= TwoWordLibrary.getTwoWordFreq(all.get(i), all.get(i + 1));
    }
    if (score < -3) {
      return 0;
    }

    // 首字分数
    newWordAttr = first.getTermNatures().newWordAttr;
    score += getTermScore(newWordAttr, newWordAttr.getB());
    // 末字分数
    newWordAttr = end.getTermNatures().newWordAttr;
    score += getTermScore(newWordAttr, newWordAttr.getE());
    // 中词分数
    double midelScore = 0;
    Term term = null;
    for (int i = 1; i < length; i++) {
      term = all.get(i);
      newWordAttr = term.getTermNatures().newWordAttr;
      midelScore += getTermScore(newWordAttr, newWordAttr.getM());
    }
    score += midelScore / (length);
    return score;
  }
Example #2
0
  /**
   * 从一个词的词性到另一个词的词的分数
   *
   * @param form 前面的词
   * @param to 后面的词
   * @return 分数
   */
  public static double compuScore(Term from, Term to) {
    double frequency = from.getTermNatures().allFreq + 1;

    if (frequency < 0) {
      return from.getScore() + MAX_FREQUENCE;
    }

    int nTwoWordsFreq = TwoWordLibrary.getTwoWordFreq(from, to);
    double value =
        -Math.log(
            dSmoothingPara * frequency / (MAX_FREQUENCE + 80000)
                + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));

    if (value < 0) value += frequency;

    if (value < 0) {
      value += frequency;
    }
    return from.getScore() + value;
  }