/** * 数字+数字合并,zheng * * @param terms */ public static void recognition(Term[] terms) { int length = terms.length - 1; Term from = null; Term to = null; Term temp = null; for (int i = 0; i < length; i++) { if (terms[i] == null) { continue; } else if (".".equals(terms[i].getName())) { // 如果是.前后都为数字进行特殊处理 to = terms[i].getTo(); from = terms[i].getFrom(); if (from.getTermNatures().numAttr.flag && to.getTermNatures().numAttr.flag) { from.setName(from.getName() + "." + to.getName()); TermUtil.termLink(from, to.getTo()); terms[to.getOffe()] = null; terms[i] = null; i = from.getOffe() - 1; } continue; } else if (!terms[i].getTermNatures().numAttr.flag) { continue; } temp = terms[i]; // 将所有的数字合并 while ((temp = temp.getTo()).getTermNatures().numAttr.flag) { terms[i].setName(terms[i].getName() + temp.getName()); } // 如果是数字结尾 if (temp.getTermNatures().numAttr.numEndFreq > 0) { terms[i].setName(terms[i].getName() + temp.getName()); temp = temp.getTo(); } // 如果不等,说明terms[i]发生了改变 if (terms[i].getTo() != temp) { TermUtil.termLink(terms[i], temp); // 将中间无用元素设置为null for (int j = i + 1; j < temp.getOffe(); j++) { terms[j] = null; } i = temp.getOffe() - 1; } } }
/** * 新词熵及其左右熵 * * @param all */ public static double leftRightEntropy(List<Term> all) { // TODO Auto-generated method stub double score = 0; NewWordNatureAttr newWordAttr = null; Term first = all.get(0); // 查看左右链接 int twoWordFreq = TwoWordLibrary.getTwoWordFreq(first.getFrom(), first); score -= twoWordFreq; // 查看右连接 int length = all.size() - 1; Term end = all.get(all.size() - 1); twoWordFreq = TwoWordLibrary.getTwoWordFreq(end, end.getTo()); score -= twoWordFreq; // 查看内部链接 for (int i = 0; i < length; i++) { score -= TwoWordLibrary.getTwoWordFreq(all.get(i), all.get(i + 1)); } if (score < -3) { return 0; } // 首字分数 newWordAttr = first.getTermNatures().newWordAttr; score += getTermScore(newWordAttr, newWordAttr.getB()); // 末字分数 newWordAttr = end.getTermNatures().newWordAttr; score += getTermScore(newWordAttr, newWordAttr.getE()); // 中词分数 double midelScore = 0; Term term = null; for (int i = 1; i < length; i++) { term = all.get(i); newWordAttr = term.getTermNatures().newWordAttr; midelScore += getTermScore(newWordAttr, newWordAttr.getM()); } score += midelScore / (length); return score; }
public void recognition() { if (branch == null) { return; } int length = terms.length - 1; Term term = null; for (int i = 0; i < length; i++) { if (terms[i] == null) { continue; } else { from = terms[i].getFrom(); terms[i].score = 0; terms[i].selfScore = 0; } branch = branch.getBranch(terms[i].getName()); if (branch == null || branch.getStatus() == 3) { reset(); continue; } offe = i; // 循环查找添加 term = terms[i]; sb.append(term.getName()); if (branch.getStatus() == 2) { term.selfScore = branch.getParam().getScore(); } boolean flag = true; while (flag) { term = term.getTo(); branch = branch.getBranch(term.getName()); // 如果没有找到跳出 if (branch == null) { break; } switch (branch.getStatus()) { case 1: sb.append(term.getName()); continue; case 2: sb.append(term.getName()); score = branch.getParam().getScore(); tempNature = branch.getParam().getNature(); to = term.getTo(); makeNewTerm(); continue; case 3: sb.append(term.getName()); score = branch.getParam().getScore(); tempNature = branch.getParam().getNature(); to = term.getTo(); makeNewTerm(); flag = false; break; default: System.out.println("怎么能出现0呢?"); break; } } reset(); } }