Ejemplo n.º 1
0
  /**
   * Returns the string of words (with timestamp) for this token. This method assumes that the word
   * tokens come before other types of token.
   *
   * @param wantFiller true if we want filler words, false otherwise
   * @return the string of words
   */
  private String getTimedWordPath(Token token, boolean wantFiller) {
    StringBuilder sb = new StringBuilder();

    // get to the first emitting token
    while (token != null && !token.isEmitting()) {
      token = token.getPredecessor();
    }

    if (token != null) {
      Data lastWordFirstFeature = token.getData();
      Data lastFeature = lastWordFirstFeature;
      token = token.getPredecessor();

      while (token != null) {
        if (token.isWord()) {
          Word word = token.getWord();
          if (wantFiller || !word.isFiller()) {
            addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature);
          }
          lastWordFirstFeature = lastFeature;
        }
        Data feature = token.getData();
        if (feature != null) {
          lastFeature = feature;
        }
        token = token.getPredecessor();
      }
    }
    return sb.toString();
  }
Ejemplo n.º 2
0
  /**
   * Returns the string of words for this token, each with the starting sample number as the
   * timestamp. This method assumes that the word tokens come after the unit and hmm tokens.
   *
   * @return the string of words, each with the starting sample number
   */
  private String getTimedWordTokenLastPath(Token token, boolean wantFiller) {
    StringBuilder sb = new StringBuilder();
    Word word = null;
    Data lastFeature = null;
    Data lastWordFirstFeature = null;

    while (token != null) {
      if (token.isWord()) {
        if (word != null && lastFeature != null) {
          if (wantFiller || !word.isFiller()) {
            addWord(sb, word, (FloatData) lastFeature, (FloatData) lastWordFirstFeature);
          }
          word = token.getWord();
          lastWordFirstFeature = lastFeature;
        }
        word = token.getWord();
      }
      Data feature = token.getData();
      if (feature != null) {
        lastFeature = feature;
        if (lastWordFirstFeature == null) {
          lastWordFirstFeature = lastFeature;
        }
      }
      token = token.getPredecessor();
    }

    return sb.toString();
  }
    /**
     * Purges excess members. Remove all nodes that fall below the relativeBeamWidth
     *
     * @return a (possible new) active list
     */
    public ActiveList purge() {
      int fillerCount = 0;
      Map<Word, Integer> countMap = new HashMap<Word, Integer>();
      Collections.sort(tokenList, Scoreable.COMPARATOR);
      // remove word duplicates
      for (Iterator<Token> i = tokenList.iterator(); i.hasNext(); ) {
        Token token = i.next();
        WordSearchState wordState = (WordSearchState) token.getSearchState();

        Word word = wordState.getPronunciation().getWord();

        // only allow  maxFiller words
        if (maxFiller > 0) {
          if (word.isFiller()) {
            if (fillerCount < maxFiller) {
              fillerCount++;
            } else {
              i.remove();
              continue;
            }
          }
        }

        if (maxPathsPerWord > 0) {
          Integer count = countMap.get(word);
          int c = count == null ? 0 : count;

          // Since the tokens are sorted by score we only
          // keep the n tokens for a particular word

          if (c < maxPathsPerWord - 1) {
            countMap.put(word, c + 1);
          } else {
            i.remove();
          }
        }
      }

      if (tokenList.size() > absoluteBeamWidth) {
        tokenList = tokenList.subList(0, absoluteBeamWidth);
      }

      return this;
    }