/** * 新词熵及其左右熵 * * @param all */ public static double leftRightEntropy(List<Term> all) { // TODO Auto-generated method stub double score = 0; NewWordNatureAttr newWordAttr = null; Term first = all.get(0); // 查看左右链接 int twoWordFreq = TwoWordLibrary.getTwoWordFreq(first.getFrom(), first); score -= twoWordFreq; // 查看右连接 int length = all.size() - 1; Term end = all.get(all.size() - 1); twoWordFreq = TwoWordLibrary.getTwoWordFreq(end, end.getTo()); score -= twoWordFreq; // 查看内部链接 for (int i = 0; i < length; i++) { score -= TwoWordLibrary.getTwoWordFreq(all.get(i), all.get(i + 1)); } if (score < -3) { return 0; } // 首字分数 newWordAttr = first.getTermNatures().newWordAttr; score += getTermScore(newWordAttr, newWordAttr.getB()); // 末字分数 newWordAttr = end.getTermNatures().newWordAttr; score += getTermScore(newWordAttr, newWordAttr.getE()); // 中词分数 double midelScore = 0; Term term = null; for (int i = 1; i < length; i++) { term = all.get(i); newWordAttr = term.getTermNatures().newWordAttr; midelScore += getTermScore(newWordAttr, newWordAttr.getM()); } score += midelScore / (length); return score; }
/** * 从一个词的词性到另一个词的词的分数 * * @param form 前面的词 * @param to 后面的词 * @return 分数 */ public static double compuScore(Term from, Term to) { double frequency = from.getTermNatures().allFreq + 1; if (frequency < 0) { return from.getScore() + MAX_FREQUENCE; } int nTwoWordsFreq = TwoWordLibrary.getTwoWordFreq(from, to); double value = -Math.log( dSmoothingPara * frequency / (MAX_FREQUENCE + 80000) + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp)); if (value < 0) value += frequency; if (value < 0) { value += frequency; } return from.getScore() + value; }