コード例 #1
0
 private void makeNewTerm() {
   Term term = new Term(sb.toString(), offe, tempNature.natureStr, 1);
   term.selfScore = score;
   term.setNature(tempNature);
   if (sb.length() > 3) {
     term.setSubTerm(TermUtil.getSubTerm(from, to));
   }
   TermUtil.termLink(from, term);
   TermUtil.termLink(term, to);
   TermUtil.insertTerm(terms, term);
   TermUtil.parseNature(term);
 }
コード例 #2
0
 private List<Term> recogntion_() {
   Term term = null;
   Term tempTerm = null;
   List<Term> termList = new ArrayList<Term>();
   int beginFreq = 10;
   for (int i = 0; i < terms.length; i++) {
     term = terms[i];
     if (term == null || !term.getTermNatures().personAttr.flag) {
       continue;
     }
     term.score = 0;
     term.selfScore = 0;
     int freq = 0;
     for (int j = 2; j > -1; j--) {
       freq = term.getTermNatures().personAttr.getFreq(j, 0);
       if ((freq > 10) || (term.getName().length() == 2 && freq > 10)) {
         tempTerm = nameFind(i, beginFreq, j);
         if (tempTerm != null) {
           termList.add(tempTerm);
           // 如果是无争议性识别
           if (skip) {
             for (int j2 = i; j2 < tempTerm.getToValue(); j2++) {
               if (terms[j2] != null) {
                 terms[j2].score = 0;
                 terms[j2].selfScore = 0;
               }
             }
             i = tempTerm.getToValue() - 1;
             break;
           }
         }
       }
     }
     beginFreq = term.getTermNatures().personAttr.begin + 1;
   }
   return termList;
 }
コード例 #3
0
  public void recognition() {
    if (branch == null) {
      return;
    }
    int length = terms.length - 1;

    Term term = null;
    for (int i = 0; i < length; i++) {
      if (terms[i] == null) {
        continue;
      } else {
        from = terms[i].getFrom();
        terms[i].score = 0;
        terms[i].selfScore = 0;
      }

      branch = branch.getBranch(terms[i].getName());

      if (branch == null || branch.getStatus() == 3) {
        reset();
        continue;
      }

      offe = i;

      // 循环查找添加
      term = terms[i];
      sb.append(term.getName());
      if (branch.getStatus() == 2) {
        term.selfScore = branch.getParam().getScore();
      }
      boolean flag = true;
      while (flag) {
        term = term.getTo();
        branch = branch.getBranch(term.getName());
        // 如果没有找到跳出
        if (branch == null) {
          break;
        }

        switch (branch.getStatus()) {
          case 1:
            sb.append(term.getName());
            continue;
          case 2:
            sb.append(term.getName());
            score = branch.getParam().getScore();
            tempNature = branch.getParam().getNature();
            to = term.getTo();
            makeNewTerm();
            continue;
          case 3:
            sb.append(term.getName());
            score = branch.getParam().getScore();
            tempNature = branch.getParam().getNature();
            to = term.getTo();
            makeNewTerm();
            flag = false;
            break;
          default:
            System.out.println("怎么能出现0呢?");
            break;
        }
      }
      reset();
    }
  }
コード例 #4
0
  /**
   * 人名识别
   *
   * @param term
   * @param offe
   * @param freq
   */
  private Term nameFind(int offe, int beginFreq, int size) {
    // TODO Auto-generated method stub
    StringBuilder sb = new StringBuilder();
    int undefinite = 0;
    skip = false;
    PersonNatureAttr pna = null;
    int index = 0;
    int freq = 0;
    double allFreq = 0;
    Term term = null;
    int i = offe;
    for (; i < terms.length; i++) {
      // 走到结尾处识别出来一个名字.
      if (terms[i] == null) {
        continue;
      }
      term = terms[i];
      pna = term.getTermNatures().personAttr;
      // 在这个长度的这个位置的词频,如果没有可能就干掉,跳出循环
      if ((freq = pna.getFreq(size, index)) == 0) {
        return null;
      }

      if (pna.allFreq > 0) {
        undefinite++;
      }
      sb.append(term.getName());
      allFreq += Math.log(term.getTermNatures().allFreq + 1);
      allFreq += -Math.log((freq));
      index++;

      if (index == size + 2) {
        break;
      }
    }

    double score = -Math.log(FACTORY[size]);
    score += allFreq;
    double endFreq = 0;
    // 开始寻找结尾词
    boolean flag = true;
    while (flag) {
      i++;
      if (i >= terms.length) {
        endFreq = 10;
        flag = false;
      } else if (terms[i] != null) {
        int twoWordFreq = NgramLibrary.getTwoWordFreq(term, terms[i]);
        if (twoWordFreq > 3) {
          return null;
        }
        endFreq = terms[i].getTermNatures().personAttr.end + 1;
        flag = false;
      }
    }

    score -= Math.log(endFreq);
    score -= Math.log(beginFreq);

    if (score > -3) {
      return null;
    }

    if (allFreq > 0 && undefinite > 0) {
      return null;
    }

    skip = undefinite == 0;

    term = new Term(sb.toString(), offe, TermNatures.NR);
    term.selfScore = score;

    return term;
  }