예제 #1
0
  private int getPenaltyCost(ViterbiNode node) {
    int pathCost = 0;
    String surface = node.getSurface();
    int length = surface.length();

    if (length > kanjiPenaltyLengthTreshold) {
      if (isKanjiOnly(surface)) { // Process only Kanji keywords
        pathCost += (length - kanjiPenaltyLengthTreshold) * kanjiPenalty;
      } else if (length > otherPenaltyLengthThreshold) {
        pathCost += (length - otherPenaltyLengthThreshold) * otherPenalty;
      }
    }
    return pathCost;
  }
예제 #2
0
  private LinkedList<ViterbiNode> convertUnknownWordToUnigramNode(ViterbiNode node) {
    LinkedList<ViterbiNode> uniGramNodes = new LinkedList<>();
    int unigramWordId = 0;
    String surface = node.getSurface();

    for (int i = surface.length(); i > 0; i--) {
      String word = surface.substring(i - 1, i);
      int startIndex = node.getStartIndex() + i - 1;

      ViterbiNode uniGramNode =
          new ViterbiNode(
              unigramWordId, word, unknownDictionary, startIndex, ViterbiNode.Type.UNKNOWN);
      uniGramNodes.addFirst(uniGramNode);
    }

    return uniGramNodes;
  }