Example #1
0
  public static void main(String[] args) throws IOException, MaryConfigurationException {

    if (args.length < 2) {
      System.out.println("Usage:");
      System.out.println(
          "java marytts.modules.phonemiser.TrainedLTS allophones.xml lts-model.lts [removeTrailingOneFromPhones]");
      System.exit(0);
    }
    String allophoneFile = args[0];
    String ltsFile = args[1];
    boolean myRemoveTrailingOneFromPhones = true;
    if (args.length > 2) {
      myRemoveTrailingOneFromPhones = Boolean.getBoolean(args[2]);
    }

    TrainedLTS lts =
        new TrainedLTS(
            AllophoneSet.getAllophoneSet(allophoneFile),
            new FileInputStream(ltsFile),
            myRemoveTrailingOneFromPhones,
            new Syllabifier(
                AllophoneSet.getAllophoneSet(allophoneFile), myRemoveTrailingOneFromPhones));

    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String line;
    while ((line = br.readLine()) != null) {
      line = line.trim();
      String pron = lts.predictPronunciation(line);
      String syl = lts.syllabify(pron);
      String sylStripped = syl.replaceAll("[-' ]+", "");
      System.out.println(sylStripped);
    }
  }
Example #2
0
  public String predictPronunciation(String graphemes) {
    if (convertToLowercase) graphemes = graphemes.toLowerCase(allophoneSet.getLocale());

    String returnStr = "";

    for (int i = 0; i < graphemes.length(); i++) {

      byte[] byteFeatures = new byte[2 * this.context + 1];

      for (int fnr = 0; fnr < 2 * this.context + 1; fnr++) {
        int pos = i - context + fnr;

        String grAtPos =
            (pos < 0 || pos >= graphemes.length()) ? "null" : graphemes.substring(pos, pos + 1);

        try {
          byteFeatures[fnr] = this.tree.getFeatureDefinition().getFeatureValueAsByte(fnr, grAtPos);
          // ... can also try to call explicit:
          // features[fnr] = this.fd.getFeatureValueAsByte("att"+fnr, cg.substr(pos)
        } catch (IllegalArgumentException iae) {
          // Silently ignore unknown characters
          byteFeatures[fnr] = this.tree.getFeatureDefinition().getFeatureValueAsByte(fnr, "null");
        }
      }

      FeatureVector fv = new FeatureVector(byteFeatures, new short[] {}, new float[] {}, 0);

      StringAndFloatLeafNode leaf = (StringAndFloatLeafNode) tree.interpretToNode(fv, 0);
      String prediction = leaf.mostProbableString(featureDefinition, indexPredictedFeature);
      returnStr += prediction.substring(1, prediction.length() - 1);
    }

    return returnStr;
  }
 /**
  * Convert a phone string into a list of string representations of individual phones. The input
  * can use the suffix "1" to indicate stressed vowels.
  *
  * @param phoneString the phone string to split
  * @return a linked list of strings, each string representing an individual phone
  * @deprecated This duplicates (badly) {@link AllophoneSet#splitAllophoneString(String)}; use that
  *     method instead.
  */
 @Deprecated
 protected LinkedList<String> splitIntoAllophones(String phoneString) {
   LinkedList<String> phoneList = new LinkedList<String>();
   for (int i = 0; i < phoneString.length(); i++) {
     // Try to cut off individual segments,
     // starting with the longest prefixes,
     // and allowing for a suffix "1" marking stress:
     String name = null;
     for (int j = 3; j >= 1; j--) {
       if (i + j <= phoneString.length()) {
         String candidate = phoneString.substring(i, i + j);
         try {
           allophoneSet.getAllophone(candidate);
           name = candidate;
           i += j - 1; // so that the next i++ goes beyond current phone
           break;
         } catch (IllegalArgumentException e) {
           // ignore
         }
       }
     }
     if (name != null) {
       phoneList.add(name);
     }
   }
   return phoneList;
 }
 /**
  * Get the Allophone object named phone; if phone ends with "1", discard the "1" and use the rest
  * of the string as the phone symbol.
  *
  * @param phone phone
  * @deprecated Use {@link AllophoneSet#getAllophone(String)} instead
  * @return allophoneset.getAllophone(phonesubstring(0, phone.length() - 1)) if
  *     this.removeTrailingOneFromPhones and phone.endsWith("1"),
  *     allophoneset.getAllophone(phonesubstring(phone) otherwise
  */
 @Deprecated
 protected Allophone getAllophone(String phone) {
   if (this.removeTrailingOneFromPhones && phone.endsWith("1"))
     return allophoneSet.getAllophone(phone.substring(0, phone.length() - 1));
   else return allophoneSet.getAllophone(phone);
 }
  /**
   * For those syllables containing a "1" character, remove that "1" character and add a stress
   * marker ' at the beginning of the syllable.
   *
   * @param phoneList phoneList
   */
  protected void correctStressSymbol(LinkedList<String> phoneList) {
    boolean stressFound = false;
    ListIterator<String> it = phoneList.listIterator(0);
    while (it.hasNext()) {
      String s = it.next();
      if (s.endsWith("1")) {
        if (this.removeTrailingOneFromPhones) {
          it.set(s.substring(0, s.length() - 1)); // delete "1"
        }
        if (!stressFound) {
          // Only add a stress marker for first occurrence of "1":
          // Search backwards for syllable boundary or beginning of word:
          int steps = 0;
          while (it.hasPrevious()) {
            steps++;
            String t = it.previous();
            if (t.equals("-") || t.equals("_")) { // syllable boundary
              it.next();
              steps--;
              break;
            }
          }
          it.add("'");
          while (steps > 0) {
            it.next();
            steps--;
          }
          stressFound = true;
        }
      }
    }
    // No stressed vowel in word?
    if (!stressFound) {
      // Stress first non-schwa syllable
      it = phoneList.listIterator(0);
      while (it.hasNext()) {
        String s = it.next();

        // HB there's a problem here, not sure why, but s can be "-" in some circumstances and we
        // get
        // java.lang.IllegalArgumentException: Allophone `-' could not be found in AllophoneSet
        // `sampa' (Locale: sv)
        // in that case
        if (s == "-") {
          System.err.println("Problem with -");
          break;
        }

        Allophone ph = allophoneSet.getAllophone(s);
        if (ph.sonority() >= 5) { // non-schwa vowel
          // Search backwards for syllable boundary or beginning of word:
          int steps = 0;
          while (it.hasPrevious()) {
            steps++;
            String t = it.previous();
            if (t.equals("-") || t.equals("_")) { // syllable boundary
              it.next();
              steps--;
              break;
            }
          }
          it.add("'");
          while (steps > 0) {
            it.next();
            steps--;
          }
          break; // OK, that's it.
        }
      }
    }
  }