예제 #1
0
  /** 人名消歧,比如.邓颖超生前->邓颖 超生 前 fix to 丁颖超 生 前! 规则的方式增加如果两个人名之间连接是- , ·,•则连接 */
  public static void nameAmbiguity(Term[] terms) {
    Term from = null;
    Term term = null;
    Term next = null;
    for (int i = 0; i < terms.length - 1; i++) {
      term = terms[i];
      if (term != null && term.termNatures() == TermNatures.NR && term.getName().length() == 2) {
        next = terms[i + 2];
        if (next.termNatures().personAttr.split > 0) {
          term.setName(term.getName() + next.getName().charAt(0));
          terms[i + 2] = null;
          terms[i + 3] = new Term(next.getName().substring(1), next.getOffe(), TermNatures.NW);
          TermUtil.termLink(term, terms[i + 3]);
          TermUtil.termLink(terms[i + 3], next.to());
        }
      }
    }

    // 外国人名修正
    for (int i = 0; i < terms.length; i++) {
      term = terms[i];
      if (term != null
          && term.getName().length() == 1
          && i > 0
          && WordAlert.CharCover(term.getName().charAt(0)) == '·') {
        from = term.from();
        next = term.to();

        if (from.natrue().natureStr.startsWith("nr") && next.natrue().natureStr.startsWith("nr")) {
          from.setName(from.getName() + term.getName() + next.getName());
          TermUtil.termLink(from, next.to());
          terms[i] = null;
          terms[i + 1] = null;
        }
      }
    }
  }
예제 #2
0
  /**
   * 从一个词的词性到另一个词的词的分数
   *
   * @param form 前面的词
   * @param to 后面的词
   * @return 分数
   */
  public static double compuScore(Term from, Term to) {
    double frequency = from.termNatures().allFreq + 1;

    if (frequency < 0) {
      double score = from.score() + MAX_FREQUENCE;
      from.score(score);
      return score;
    }

    int nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);
    double value =
        -Math.log(
            dSmoothingPara * frequency / (MAX_FREQUENCE + 80000)
                + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));

    if (value < 0) {
      value += frequency;
    }
    return from.score() + value;
  }
예제 #3
0
 /**
  * 词性词频词长.计算出来一个分数
  *
  * @param from
  * @param term
  * @return
  */
 public static double compuScoreFreq(Term from, Term term) {
   // TODO Auto-generated method stub
   return from.termNatures().allFreq + term.termNatures().allFreq;
 }