/** * Returns the string of words for this token, each with the starting sample number as the * timestamp. This method assumes that the word tokens come after the unit and hmm tokens. * * @return the string of words, each with the starting sample number */ private String getTimedWordTokenLastPath(Token token, boolean wantFiller) { StringBuilder sb = new StringBuilder(); Word word = null; Data lastFeature = null; Data lastWordFirstFeature = null; while (token != null) { if (token.isWord()) { if (word != null && lastFeature != null) { if (wantFiller || !word.isFiller()) { addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature); } word = token.getWord(); lastWordFirstFeature = lastFeature; } word = token.getWord(); } Data feature = token.getData(); if (feature != null) { lastFeature = feature; if (lastWordFirstFeature == null) { lastWordFirstFeature = lastFeature; } } token = token.getPredecessor(); } return sb.toString(); }
/** * Returns the string of words (with timestamp) for this token. This method assumes that the word * tokens come before other types of token. * * @param wantFiller true if we want filler words, false otherwise * @return the string of words */ private String getTimedWordPath(Token token, boolean wantFiller) { StringBuilder sb = new StringBuilder(); // get to the first emitting token while (token != null && !token.isEmitting()) { token = token.getPredecessor(); } if (token != null) { Data lastWordFirstFeature = token.getData(); Data lastFeature = lastWordFirstFeature; token = token.getPredecessor(); while (token != null) { if (token.isWord()) { Word word = token.getWord(); if (wantFiller || !word.isFiller()) { addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature); } lastWordFirstFeature = lastFeature; } Data feature = token.getData(); if (feature != null) { lastFeature = feature; } token = token.getPredecessor(); } } return sb.toString(); }
/** * Gets the ngram probability of the word sequence represented by the word list * * @param wordSequence the word sequence * @return the probability of the word sequence. Probability is in logMath log base */ @Override public float getProbability(WordSequence wordSequence) { float prob = parent.getProbability(wordSequence); if (keywordProbs == null) return prob; for (Word word : wordSequence.getWords()) { String ws = word.toString(); if (keywordProbs.containsKey(ws)) { prob *= keywordProbs.get(ws); } } return prob; }
/** * Purges excess members. Remove all nodes that fall below the relativeBeamWidth * * @return a (possible new) active list */ public ActiveList purge() { int fillerCount = 0; Map<Word, Integer> countMap = new HashMap<Word, Integer>(); Collections.sort(tokenList, Scoreable.COMPARATOR); // remove word duplicates for (Iterator<Token> i = tokenList.iterator(); i.hasNext(); ) { Token token = i.next(); WordSearchState wordState = (WordSearchState) token.getSearchState(); Word word = wordState.getPronunciation().getWord(); // only allow maxFiller words if (maxFiller > 0) { if (word.isFiller()) { if (fillerCount < maxFiller) { fillerCount++; } else { i.remove(); continue; } } } if (maxPathsPerWord > 0) { Integer count = countMap.get(word); int c = count == null ? 0 : count; // Since the tokens are sorted by score we only // keep the n tokens for a particular word if (c < maxPathsPerWord - 1) { countMap.put(word, c + 1); } else { i.remove(); } } } if (tokenList.size() > absoluteBeamWidth) { tokenList = tokenList.subList(0, absoluteBeamWidth); } return this; }
/** * Adds the given word into the given string builder with the start and end times from the given * features. * * @param sb the StringBuilder into which the word is added * @param word the word to add * @param startFeature the starting feature * @param endFeature tne ending feature */ private void addWord(StringBuilder sb, Word word, FloatData startFeature, FloatData endFeature) { float startTime = startFeature == null ? -1 : ((float) startFeature.getFirstSampleNumber() / startFeature.getSampleRate()); float endTime = endFeature == null ? -1 : ((float) endFeature.getFirstSampleNumber() / endFeature.getSampleRate()); if (sb.length() > 0) { sb.insert(0, ' '); } sb.insert(0, (word.getSpelling() + '(' + startTime + ',' + endTime + ')')); }
/** * Internal routine used when dumping Lattices as .LAT files * * @param f print writer to store * @throws IOException if error occurred */ void dump(PrintWriter f) throws IOException { f.println( "node: " + id + ' ' + word.getSpelling() + // " a:" + getForwardProb() + " b:" + getBackwardProb() // " p:" + getPosterior()); ' ' + getBeginTime() + ' ' + getEndTime()); }
/** * Returns true if the given node is equivalent to this node. Two nodes are equivalent only if * they have the same word, the same number of entering and leaving edges, and that their begin * and end times are the same. * * @param other the Node we're comparing to * @return true if the Node is equivalent; false otherwise */ public boolean isEquivalent(Node other) { return ((word.getSpelling().equals(other.getWord().getSpelling()) && (getEnteringEdges().size() == other.getEnteringEdges().size() && getLeavingEdges().size() == other.getLeavingEdges().size())) && (getBeginTime() == other.getBeginTime() && endTime == other.getEndTime())); }
/** * Returns a description of this Node that contains the word, the start time, and the end time. * * @return a description of this Node */ @Override public String toString() { return ("Node(" + word.getSpelling() + ',' + getBeginTime() + '|' + getEndTime() + ')'); }