public static void main(String[] args) throws IOException, MaryConfigurationException { if (args.length < 2) { System.out.println("Usage:"); System.out.println( "java marytts.modules.phonemiser.TrainedLTS allophones.xml lts-model.lts [removeTrailingOneFromPhones]"); System.exit(0); } String allophoneFile = args[0]; String ltsFile = args[1]; boolean myRemoveTrailingOneFromPhones = true; if (args.length > 2) { myRemoveTrailingOneFromPhones = Boolean.getBoolean(args[2]); } TrainedLTS lts = new TrainedLTS( AllophoneSet.getAllophoneSet(allophoneFile), new FileInputStream(ltsFile), myRemoveTrailingOneFromPhones, new Syllabifier( AllophoneSet.getAllophoneSet(allophoneFile), myRemoveTrailingOneFromPhones)); BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String line; while ((line = br.readLine()) != null) { line = line.trim(); String pron = lts.predictPronunciation(line); String syl = lts.syllabify(pron); String sylStripped = syl.replaceAll("[-' ]+", ""); System.out.println(sylStripped); } }
/** * Phonemise the word text. This starts with a simple lexicon lookup, followed by some heuristics, * and finally applies letter-to-sound rules if nothing else was successful. * * @param text the textual (graphemic) form of a word. * @param pos the part-of-speech of the word * @param g2pMethod This is an awkward way to return a second String parameter via a * StringBuilder. If a phonemisation of the text is found, this parameter will be filled with * the method of phonemisation ("lexicon", ... "rules"). * @return a phonemisation of the text if one can be generated, or null if no phonemisation method * was successful. */ public String phonemise(String text, String pos, StringBuilder g2pMethod) { // First, try a simple userdict and lexicon lookup: String result = userdictLookup(text, pos); if (result != null) { g2pMethod.append("userdict"); return result; } result = lexiconLookup(text, pos); if (result != null) { g2pMethod.append("lexicon"); return result; } // HB 150915 adding secondary lexicon result = secondary_lexiconLookup(text, pos); if (result != null) { g2pMethod.append("lexicon"); return result; } // Lookup attempts failed. Try normalising exotic letters // (diacritics on vowels, etc.), look up again: String normalised = MaryUtils.normaliseUnicodeLetters(text, getLocale()); if (!normalised.equals(text)) { result = userdictLookup(normalised, pos); if (result != null) { g2pMethod.append("userdict"); return result; } result = lexiconLookup(normalised, pos); if (result != null) { g2pMethod.append("lexicon"); return result; } // HB 150915 adding secondary lexicon result = secondary_lexiconLookup(text, pos); if (result != null) { g2pMethod.append("lexicon"); return result; } } // Cannot find it in the lexicon -- apply letter-to-sound rules // to the normalised form String phones = lts.predictPronunciation(text); try { result = lts.syllabify(phones); } catch (IllegalArgumentException e) { logger.error(String.format("Problem with token <%s> [%s]: %s", text, phones, e.getMessage())); } if (result != null) { g2pMethod.append("rules"); return result; } return null; }