private int getPenaltyCost(ViterbiNode node) { int pathCost = 0; String surface = node.getSurface(); int length = surface.length(); if (length > kanjiPenaltyLengthTreshold) { if (isKanjiOnly(surface)) { // Process only Kanji keywords pathCost += (length - kanjiPenaltyLengthTreshold) * kanjiPenalty; } else if (length > otherPenaltyLengthThreshold) { pathCost += (length - otherPenaltyLengthThreshold) * otherPenalty; } } return pathCost; }
private LinkedList<ViterbiNode> convertUnknownWordToUnigramNode(ViterbiNode node) { LinkedList<ViterbiNode> uniGramNodes = new LinkedList<>(); int unigramWordId = 0; String surface = node.getSurface(); for (int i = surface.length(); i > 0; i--) { String word = surface.substring(i - 1, i); int startIndex = node.getStartIndex() + i - 1; ViterbiNode uniGramNode = new ViterbiNode( unigramWordId, word, unknownDictionary, startIndex, ViterbiNode.Type.UNKNOWN); uniGramNodes.addFirst(uniGramNode); } return uniGramNodes; }