コード例 #1
0
  /**
   * 数字+数字合并,zheng
   *
   * @param terms
   */
  public static void recognition(Term[] terms) {
    int length = terms.length - 1;
    Term from = null;
    Term to = null;
    Term temp = null;
    for (int i = 0; i < length; i++) {
      if (terms[i] == null) {
        continue;
      } else if (".".equals(terms[i].getName())) {
        // 如果是.前后都为数字进行特殊处理
        to = terms[i].getTo();
        from = terms[i].getFrom();
        if (from.getTermNatures().numAttr.flag && to.getTermNatures().numAttr.flag) {
          from.setName(from.getName() + "." + to.getName());
          TermUtil.termLink(from, to.getTo());
          terms[to.getOffe()] = null;
          terms[i] = null;
          i = from.getOffe() - 1;
        }
        continue;
      } else if (!terms[i].getTermNatures().numAttr.flag) {
        continue;
      }

      temp = terms[i];
      // 将所有的数字合并
      while ((temp = temp.getTo()).getTermNatures().numAttr.flag) {
        terms[i].setName(terms[i].getName() + temp.getName());
      }
      // 如果是数字结尾
      if (temp.getTermNatures().numAttr.numEndFreq > 0) {
        terms[i].setName(terms[i].getName() + temp.getName());
        temp = temp.getTo();
      }

      // 如果不等,说明terms[i]发生了改变
      if (terms[i].getTo() != temp) {
        TermUtil.termLink(terms[i], temp);
        // 将中间无用元素设置为null
        for (int j = i + 1; j < temp.getOffe(); j++) {
          terms[j] = null;
        }
        i = temp.getOffe() - 1;
      }
    }
  }
コード例 #2
0
ファイル: MathUtil.java プロジェクト: yestoday2009/ansj_seg
  /**
   * 新词熵及其左右熵
   *
   * @param all
   */
  public static double leftRightEntropy(List<Term> all) {
    // TODO Auto-generated method stub
    double score = 0;
    NewWordNatureAttr newWordAttr = null;
    Term first = all.get(0);

    // 查看左右链接
    int twoWordFreq = TwoWordLibrary.getTwoWordFreq(first.getFrom(), first);
    score -= twoWordFreq;

    // 查看右连接
    int length = all.size() - 1;
    Term end = all.get(all.size() - 1);
    twoWordFreq = TwoWordLibrary.getTwoWordFreq(end, end.getTo());
    score -= twoWordFreq;

    // 查看内部链接
    for (int i = 0; i < length; i++) {
      score -= TwoWordLibrary.getTwoWordFreq(all.get(i), all.get(i + 1));
    }
    if (score < -3) {
      return 0;
    }

    // 首字分数
    newWordAttr = first.getTermNatures().newWordAttr;
    score += getTermScore(newWordAttr, newWordAttr.getB());
    // 末字分数
    newWordAttr = end.getTermNatures().newWordAttr;
    score += getTermScore(newWordAttr, newWordAttr.getE());
    // 中词分数
    double midelScore = 0;
    Term term = null;
    for (int i = 1; i < length; i++) {
      term = all.get(i);
      newWordAttr = term.getTermNatures().newWordAttr;
      midelScore += getTermScore(newWordAttr, newWordAttr.getM());
    }
    score += midelScore / (length);
    return score;
  }
コード例 #3
0
  public void recognition() {
    if (branch == null) {
      return;
    }
    int length = terms.length - 1;

    Term term = null;
    for (int i = 0; i < length; i++) {
      if (terms[i] == null) {
        continue;
      } else {
        from = terms[i].getFrom();
        terms[i].score = 0;
        terms[i].selfScore = 0;
      }

      branch = branch.getBranch(terms[i].getName());

      if (branch == null || branch.getStatus() == 3) {
        reset();
        continue;
      }

      offe = i;

      // 循环查找添加
      term = terms[i];
      sb.append(term.getName());
      if (branch.getStatus() == 2) {
        term.selfScore = branch.getParam().getScore();
      }
      boolean flag = true;
      while (flag) {
        term = term.getTo();
        branch = branch.getBranch(term.getName());
        // 如果没有找到跳出
        if (branch == null) {
          break;
        }

        switch (branch.getStatus()) {
          case 1:
            sb.append(term.getName());
            continue;
          case 2:
            sb.append(term.getName());
            score = branch.getParam().getScore();
            tempNature = branch.getParam().getNature();
            to = term.getTo();
            makeNewTerm();
            continue;
          case 3:
            sb.append(term.getName());
            score = branch.getParam().getScore();
            tempNature = branch.getParam().getNature();
            to = term.getTo();
            makeNewTerm();
            flag = false;
            break;
          default:
            System.out.println("怎么能出现0呢?");
            break;
        }
      }
      reset();
    }
  }