public static void main(String[] args) throws IOException, MaryConfigurationException { if (args.length < 2) { System.out.println("Usage:"); System.out.println( "java marytts.modules.phonemiser.TrainedLTS allophones.xml lts-model.lts [removeTrailingOneFromPhones]"); System.exit(0); } String allophoneFile = args[0]; String ltsFile = args[1]; boolean myRemoveTrailingOneFromPhones = true; if (args.length > 2) { myRemoveTrailingOneFromPhones = Boolean.getBoolean(args[2]); } TrainedLTS lts = new TrainedLTS( AllophoneSet.getAllophoneSet(allophoneFile), new FileInputStream(ltsFile), myRemoveTrailingOneFromPhones, new Syllabifier( AllophoneSet.getAllophoneSet(allophoneFile), myRemoveTrailingOneFromPhones)); BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String line; while ((line = br.readLine()) != null) { line = line.trim(); String pron = lts.predictPronunciation(line); String syl = lts.syllabify(pron); String sylStripped = syl.replaceAll("[-' ]+", ""); System.out.println(sylStripped); } }
public String predictPronunciation(String graphemes) { if (convertToLowercase) graphemes = graphemes.toLowerCase(allophoneSet.getLocale()); String returnStr = ""; for (int i = 0; i < graphemes.length(); i++) { byte[] byteFeatures = new byte[2 * this.context + 1]; for (int fnr = 0; fnr < 2 * this.context + 1; fnr++) { int pos = i - context + fnr; String grAtPos = (pos < 0 || pos >= graphemes.length()) ? "null" : graphemes.substring(pos, pos + 1); try { byteFeatures[fnr] = this.tree.getFeatureDefinition().getFeatureValueAsByte(fnr, grAtPos); // ... can also try to call explicit: // features[fnr] = this.fd.getFeatureValueAsByte("att"+fnr, cg.substr(pos) } catch (IllegalArgumentException iae) { // Silently ignore unknown characters byteFeatures[fnr] = this.tree.getFeatureDefinition().getFeatureValueAsByte(fnr, "null"); } } FeatureVector fv = new FeatureVector(byteFeatures, new short[] {}, new float[] {}, 0); StringAndFloatLeafNode leaf = (StringAndFloatLeafNode) tree.interpretToNode(fv, 0); String prediction = leaf.mostProbableString(featureDefinition, indexPredictedFeature); returnStr += prediction.substring(1, prediction.length() - 1); } return returnStr; }
/** * Convert a phone string into a list of string representations of individual phones. The input * can use the suffix "1" to indicate stressed vowels. * * @param phoneString the phone string to split * @return a linked list of strings, each string representing an individual phone * @deprecated This duplicates (badly) {@link AllophoneSet#splitAllophoneString(String)}; use that * method instead. */ @Deprecated protected LinkedList<String> splitIntoAllophones(String phoneString) { LinkedList<String> phoneList = new LinkedList<String>(); for (int i = 0; i < phoneString.length(); i++) { // Try to cut off individual segments, // starting with the longest prefixes, // and allowing for a suffix "1" marking stress: String name = null; for (int j = 3; j >= 1; j--) { if (i + j <= phoneString.length()) { String candidate = phoneString.substring(i, i + j); try { allophoneSet.getAllophone(candidate); name = candidate; i += j - 1; // so that the next i++ goes beyond current phone break; } catch (IllegalArgumentException e) { // ignore } } } if (name != null) { phoneList.add(name); } } return phoneList; }
/** * Get the Allophone object named phone; if phone ends with "1", discard the "1" and use the rest * of the string as the phone symbol. * * @param phone phone * @deprecated Use {@link AllophoneSet#getAllophone(String)} instead * @return allophoneset.getAllophone(phonesubstring(0, phone.length() - 1)) if * this.removeTrailingOneFromPhones and phone.endsWith("1"), * allophoneset.getAllophone(phonesubstring(phone) otherwise */ @Deprecated protected Allophone getAllophone(String phone) { if (this.removeTrailingOneFromPhones && phone.endsWith("1")) return allophoneSet.getAllophone(phone.substring(0, phone.length() - 1)); else return allophoneSet.getAllophone(phone); }
/** * For those syllables containing a "1" character, remove that "1" character and add a stress * marker ' at the beginning of the syllable. * * @param phoneList phoneList */ protected void correctStressSymbol(LinkedList<String> phoneList) { boolean stressFound = false; ListIterator<String> it = phoneList.listIterator(0); while (it.hasNext()) { String s = it.next(); if (s.endsWith("1")) { if (this.removeTrailingOneFromPhones) { it.set(s.substring(0, s.length() - 1)); // delete "1" } if (!stressFound) { // Only add a stress marker for first occurrence of "1": // Search backwards for syllable boundary or beginning of word: int steps = 0; while (it.hasPrevious()) { steps++; String t = it.previous(); if (t.equals("-") || t.equals("_")) { // syllable boundary it.next(); steps--; break; } } it.add("'"); while (steps > 0) { it.next(); steps--; } stressFound = true; } } } // No stressed vowel in word? if (!stressFound) { // Stress first non-schwa syllable it = phoneList.listIterator(0); while (it.hasNext()) { String s = it.next(); // HB there's a problem here, not sure why, but s can be "-" in some circumstances and we // get // java.lang.IllegalArgumentException: Allophone `-' could not be found in AllophoneSet // `sampa' (Locale: sv) // in that case if (s == "-") { System.err.println("Problem with -"); break; } Allophone ph = allophoneSet.getAllophone(s); if (ph.sonority() >= 5) { // non-schwa vowel // Search backwards for syllable boundary or beginning of word: int steps = 0; while (it.hasPrevious()) { steps++; String t = it.previous(); if (t.equals("-") || t.equals("_")) { // syllable boundary it.next(); steps--; break; } } it.add("'"); while (steps > 0) { it.next(); steps--; } break; // OK, that's it. } } } }