Ejemplo n.º 1
0
  public static void main(String[] args) throws IOException, MaryConfigurationException {

    if (args.length < 2) {
      System.out.println("Usage:");
      System.out.println(
          "java marytts.modules.phonemiser.TrainedLTS allophones.xml lts-model.lts [removeTrailingOneFromPhones]");
      System.exit(0);
    }
    String allophoneFile = args[0];
    String ltsFile = args[1];
    boolean myRemoveTrailingOneFromPhones = true;
    if (args.length > 2) {
      myRemoveTrailingOneFromPhones = Boolean.getBoolean(args[2]);
    }

    TrainedLTS lts =
        new TrainedLTS(
            AllophoneSet.getAllophoneSet(allophoneFile),
            new FileInputStream(ltsFile),
            myRemoveTrailingOneFromPhones,
            new Syllabifier(
                AllophoneSet.getAllophoneSet(allophoneFile), myRemoveTrailingOneFromPhones));

    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String line;
    while ((line = br.readLine()) != null) {
      line = line.trim();
      String pron = lts.predictPronunciation(line);
      String syl = lts.syllabify(pron);
      String sylStripped = syl.replaceAll("[-' ]+", "");
      System.out.println(sylStripped);
    }
  }
Ejemplo n.º 2
0
  /**
   * Phonemise the word text. This starts with a simple lexicon lookup, followed by some heuristics,
   * and finally applies letter-to-sound rules if nothing else was successful.
   *
   * @param text the textual (graphemic) form of a word.
   * @param pos the part-of-speech of the word
   * @param g2pMethod This is an awkward way to return a second String parameter via a
   *     StringBuilder. If a phonemisation of the text is found, this parameter will be filled with
   *     the method of phonemisation ("lexicon", ... "rules").
   * @return a phonemisation of the text if one can be generated, or null if no phonemisation method
   *     was successful.
   */
  public String phonemise(String text, String pos, StringBuilder g2pMethod) {
    // First, try a simple userdict and lexicon lookup:

    String result = userdictLookup(text, pos);
    if (result != null) {
      g2pMethod.append("userdict");
      return result;
    }

    result = lexiconLookup(text, pos);
    if (result != null) {
      g2pMethod.append("lexicon");
      return result;
    }

    // HB 150915 adding secondary lexicon
    result = secondary_lexiconLookup(text, pos);
    if (result != null) {
      g2pMethod.append("lexicon");
      return result;
    }

    // Lookup attempts failed. Try normalising exotic letters
    // (diacritics on vowels, etc.), look up again:
    String normalised = MaryUtils.normaliseUnicodeLetters(text, getLocale());
    if (!normalised.equals(text)) {
      result = userdictLookup(normalised, pos);
      if (result != null) {
        g2pMethod.append("userdict");
        return result;
      }
      result = lexiconLookup(normalised, pos);
      if (result != null) {
        g2pMethod.append("lexicon");
        return result;
      }
      // HB 150915 adding secondary lexicon
      result = secondary_lexiconLookup(text, pos);
      if (result != null) {
        g2pMethod.append("lexicon");
        return result;
      }
    }

    // Cannot find it in the lexicon -- apply letter-to-sound rules
    // to the normalised form

    String phones = lts.predictPronunciation(text);
    try {
      result = lts.syllabify(phones);
    } catch (IllegalArgumentException e) {
      logger.error(String.format("Problem with token <%s> [%s]: %s", text, phones, e.getMessage()));
    }
    if (result != null) {
      g2pMethod.append("rules");
      return result;
    }

    return null;
  }