/** * Returns the string of words (with timestamp) for this token. This method assumes that the word * tokens come before other types of token. * * @param wantFiller true if we want filler words, false otherwise * @return the string of words */ private String getTimedWordPath(Token token, boolean wantFiller) { StringBuilder sb = new StringBuilder(); // get to the first emitting token while (token != null && !token.isEmitting()) { token = token.getPredecessor(); } if (token != null) { Data lastWordFirstFeature = token.getData(); Data lastFeature = lastWordFirstFeature; token = token.getPredecessor(); while (token != null) { if (token.isWord()) { Word word = token.getWord(); if (wantFiller || !word.isFiller()) { addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature); } lastWordFirstFeature = lastFeature; } Data feature = token.getData(); if (feature != null) { lastFeature = feature; } token = token.getPredecessor(); } } return sb.toString(); }
/** * Returns the string of words for this token, each with the starting sample number as the * timestamp. This method assumes that the word tokens come after the unit and hmm tokens. * * @return the string of words, each with the starting sample number */ private String getTimedWordTokenLastPath(Token token, boolean wantFiller) { StringBuilder sb = new StringBuilder(); Word word = null; Data lastFeature = null; Data lastWordFirstFeature = null; while (token != null) { if (token.isWord()) { if (word != null && lastFeature != null) { if (wantFiller || !word.isFiller()) { addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature); } word = token.getWord(); lastWordFirstFeature = lastFeature; } word = token.getWord(); } Data feature = token.getData(); if (feature != null) { lastFeature = feature; if (lastWordFirstFeature == null) { lastWordFirstFeature = lastFeature; } } token = token.getPredecessor(); } return sb.toString(); }
/** * Purges excess members. Remove all nodes that fall below the relativeBeamWidth * * @return a (possible new) active list */ public ActiveList purge() { int fillerCount = 0; Map<Word, Integer> countMap = new HashMap<Word, Integer>(); Collections.sort(tokenList, Scoreable.COMPARATOR); // remove word duplicates for (Iterator<Token> i = tokenList.iterator(); i.hasNext(); ) { Token token = i.next(); WordSearchState wordState = (WordSearchState) token.getSearchState(); Word word = wordState.getPronunciation().getWord(); // only allow maxFiller words if (maxFiller > 0) { if (word.isFiller()) { if (fillerCount < maxFiller) { fillerCount++; } else { i.remove(); continue; } } } if (maxPathsPerWord > 0) { Integer count = countMap.get(word); int c = count == null ? 0 : count; // Since the tokens are sorted by score we only // keep the n tokens for a particular word if (c < maxPathsPerWord - 1) { countMap.put(word, c + 1); } else { i.remove(); } } } if (tokenList.size() > absoluteBeamWidth) { tokenList = tokenList.subList(0, absoluteBeamWidth); } return this; }