Пример #1
0
  /**
   * Returns the string of words for this token, each with the starting sample number as the
   * timestamp. This method assumes that the word tokens come after the unit and hmm tokens.
   *
   * @return the string of words, each with the starting sample number
   */
  private String getTimedWordTokenLastPath(Token token, boolean wantFiller) {
    StringBuilder sb = new StringBuilder();
    Word word = null;
    Data lastFeature = null;
    Data lastWordFirstFeature = null;

    while (token != null) {
      if (token.isWord()) {
        if (word != null && lastFeature != null) {
          if (wantFiller || !word.isFiller()) {
            addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature);
          }
          word = token.getWord();
          lastWordFirstFeature = lastFeature;
        }
        word = token.getWord();
      }
      Data feature = token.getData();
      if (feature != null) {
        lastFeature = feature;
        if (lastWordFirstFeature == null) {
          lastWordFirstFeature = lastFeature;
        }
      }
      token = token.getPredecessor();
    }

    return sb.toString();
  }
Пример #2
0
  /**
   * Returns the string of words (with timestamp) for this token. This method assumes that the word
   * tokens come before other types of token.
   *
   * @param wantFiller true if we want filler words, false otherwise
   * @return the string of words
   */
  private String getTimedWordPath(Token token, boolean wantFiller) {
    StringBuilder sb = new StringBuilder();

    // get to the first emitting token
    while (token != null && !token.isEmitting()) {
      token = token.getPredecessor();
    }

    if (token != null) {
      Data lastWordFirstFeature = token.getData();
      Data lastFeature = lastWordFirstFeature;
      token = token.getPredecessor();

      while (token != null) {
        if (token.isWord()) {
          Word word = token.getWord();
          if (wantFiller || !word.isFiller()) {
            addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature);
          }
          lastWordFirstFeature = lastFeature;
        }
        Data feature = token.getData();
        if (feature != null) {
          lastFeature = feature;
        }
        token = token.getPredecessor();
      }
    }
    return sb.toString();
  }
Пример #3
0
  /**
   * Gets the ngram probability of the word sequence represented by the word list
   *
   * @param wordSequence the word sequence
   * @return the probability of the word sequence. Probability is in logMath log base
   */
  @Override
  public float getProbability(WordSequence wordSequence) {
    float prob = parent.getProbability(wordSequence);

    if (keywordProbs == null) return prob;

    for (Word word : wordSequence.getWords()) {
      String ws = word.toString();
      if (keywordProbs.containsKey(ws)) {
        prob *= keywordProbs.get(ws);
      }
    }

    return prob;
  }
    /**
     * Purges excess members. Remove all nodes that fall below the relativeBeamWidth
     *
     * @return a (possible new) active list
     */
    public ActiveList purge() {
      int fillerCount = 0;
      Map<Word, Integer> countMap = new HashMap<Word, Integer>();
      Collections.sort(tokenList, Scoreable.COMPARATOR);
      // remove word duplicates
      for (Iterator<Token> i = tokenList.iterator(); i.hasNext(); ) {
        Token token = i.next();
        WordSearchState wordState = (WordSearchState) token.getSearchState();

        Word word = wordState.getPronunciation().getWord();

        // only allow  maxFiller words
        if (maxFiller > 0) {
          if (word.isFiller()) {
            if (fillerCount < maxFiller) {
              fillerCount++;
            } else {
              i.remove();
              continue;
            }
          }
        }

        if (maxPathsPerWord > 0) {
          Integer count = countMap.get(word);
          int c = count == null ? 0 : count;

          // Since the tokens are sorted by score we only
          // keep the n tokens for a particular word

          if (c < maxPathsPerWord - 1) {
            countMap.put(word, c + 1);
          } else {
            i.remove();
          }
        }
      }

      if (tokenList.size() > absoluteBeamWidth) {
        tokenList = tokenList.subList(0, absoluteBeamWidth);
      }

      return this;
    }
Пример #5
0
 /**
  * Adds the given word into the given string builder with the start and end times from the given
  * features.
  *
  * @param sb the StringBuilder into which the word is added
  * @param word the word to add
  * @param startFeature the starting feature
  * @param endFeature tne ending feature
  */
 private void addWord(StringBuilder sb, Word word, FloatData startFeature, FloatData endFeature) {
   float startTime =
       startFeature == null
           ? -1
           : ((float) startFeature.getFirstSampleNumber() / startFeature.getSampleRate());
   float endTime =
       endFeature == null
           ? -1
           : ((float) endFeature.getFirstSampleNumber() / endFeature.getSampleRate());
   if (sb.length() > 0) {
     sb.insert(0, ' ');
   }
   sb.insert(0, (word.getSpelling() + '(' + startTime + ',' + endTime + ')'));
 }
Пример #6
0
 /**
  * Internal routine used when dumping Lattices as .LAT files
  *
  * @param f print writer to store
  * @throws IOException if error occurred
  */
 void dump(PrintWriter f) throws IOException {
   f.println(
       "node: "
           + id
           + ' '
           + word.getSpelling()
           +
           // " a:" + getForwardProb() + " b:" + getBackwardProb()
           // " p:" + getPosterior());
           ' '
           + getBeginTime()
           + ' '
           + getEndTime());
 }
Пример #7
0
 /**
  * Returns true if the given node is equivalent to this node. Two nodes are equivalent only if
  * they have the same word, the same number of entering and leaving edges, and that their begin
  * and end times are the same.
  *
  * @param other the Node we're comparing to
  * @return true if the Node is equivalent; false otherwise
  */
 public boolean isEquivalent(Node other) {
   return ((word.getSpelling().equals(other.getWord().getSpelling())
           && (getEnteringEdges().size() == other.getEnteringEdges().size()
               && getLeavingEdges().size() == other.getLeavingEdges().size()))
       && (getBeginTime() == other.getBeginTime() && endTime == other.getEndTime()));
 }
Пример #8
0
 /**
  * Returns a description of this Node that contains the word, the start time, and the end time.
  *
  * @return a description of this Node
  */
 @Override
 public String toString() {
   return ("Node(" + word.getSpelling() + ',' + getBeginTime() + '|' + getEndTime() + ')');
 }