Beispiel #1
0
  /**
   * Predict F0 from the utterance model and apply to elements
   *
   * @param applyToElements elements to apply predicted F0s
   * @param um utterance model that contains the set of elements (phonemes) and state durations for
   *     generating F0.
   * @throws MaryConfigurationException if error generating F0 out of HMMs trees and pdfs.
   */
  private void predictAndSetF0(List<Element> applyToElements, HTSUttModel um)
      throws MaryConfigurationException {

    HTSModel m;
    try {
      String f0AttributeName = "f0";
      HTSParameterGeneration pdf2par = new HTSParameterGeneration();
      /* Once we have all the phone models Process UttModel */
      /* Generate sequence of speech parameter vectors, generate parameters out of sequence of pdf's */
      boolean debug = false; /* so it does not save the generated parameters. */
      /* this function generates features just for the trees and pdf that are not null in the HMM cart*/
      pdf2par.htsMaximumLikelihoodParameterGeneration(um, htsData);

      // (2) include the predicted values in  applicableElements (as it is done in Model)
      boolean voiced[] = pdf2par.getVoicedArray();
      int numVoiced = 0;
      // make sure that the number of applicable elements is the same as the predicted number of
      // elements
      assert applyToElements.size() == um.getNumModel();
      float f0;
      String formattedTargetValue;
      int t = 0;
      for (int i = 0;
          i < applyToElements.size();
          i++) { // this will be the same as the utterance model set
        m = um.getUttModel(i);
        int k = 1;
        int numVoicedInModel = m.getNumVoiced();
        formattedTargetValue = "";
        // System.out.format("phone = %s dur_in_frames=%d  num_voiced_frames=%d : ",
        // m.getPhoneName(), m.getTotalDur(), numVoicedInModel);
        for (int mstate = 0; mstate < cart.getNumStates(); mstate++) {
          for (int frame = 0; frame < m.getDur(mstate); frame++) {
            if (voiced[
                t++]) { // numVoiced and t are not the same because voiced values can be true or
                        // false, numVoiced count just the voiced
              f0 = (float) Math.exp(pdf2par.getlf0Pst().getPar(numVoiced++, 0));
              formattedTargetValue +=
                  "("
                      + Integer.toString((int) ((k * 100.0) / numVoicedInModel))
                      + ","
                      + Integer.toString((int) f0)
                      + ")";
              k++;
            }
          }
        }
        Element element = applyToElements.get(i);
        // "evaluate" pseudo XPath syntax:
        // TODO this needs to be extended to take into account targetAttributeNames like "foo/@bar",
        // which would add the
        // bar attribute to the foo child of this element, creating the child if not already
        // present...
        if (f0AttributeName.startsWith("@")) {
          f0AttributeName = f0AttributeName.replaceFirst("@", "");
        }
        // format targetValue according to targetAttributeFormat
        // String formattedTargetValue = String.format(targetAttributeFormat, targetValue);
        // set the new attribute value:
        // if the whole segment is unvoiced then f0 should not be fixed?
        if (formattedTargetValue.length() > 0)
          element.setAttribute(f0AttributeName, formattedTargetValue);
        // System.out.println(formattedTargetValue);
      }
      // once finished re-set to null um
      // um = null;

    } catch (Exception e) {
      throw new MaryConfigurationException("Error generating F0 out of HMMs trees and pdfs. ", e);
    }
  }
Beispiel #2
0
  /**
   * Create a utterance model list from feature vectors predicted from elements.
   *
   * @param predictFromElements elements from MaryXML from where to get feature vectors.
   * @return Utterance model um containing state durations and pdfs already searched on the trees to
   *     generate F0.
   * @throws MaryConfigurationException if error searching in HMM trees.
   */
  private HTSUttModel createUttModel(List<Element> predictFromElements)
      throws MaryConfigurationException {
    int i, k, s, t, mstate, frame, durInFrames, durStateInFrames, numVoicedInModel;
    HTSModel m;
    List<Element> predictorElements = predictFromElements;
    List<Target> predictorTargets = getTargets(predictorElements);
    FeatureVector fv;
    HTSUttModel um = new HTSUttModel();
    FeatureDefinition feaDef = htsData.getFeatureDefinition();
    float duration;
    double diffdurOld = 0.0;
    double diffdurNew = 0.0;
    float f0s[] = null;
    try {
      // (1) Predict the values
      for (i = 0; i < predictorTargets.size(); i++) {
        fv = predictorTargets.get(i).getFeatureVector();
        Element e = predictFromElements.get(i);
        um.addUttModel(new HTSModel(cart.getNumStates()));
        m = um.getUttModel(i);
        /* this function also sets the phone name, the phone between - and + */
        m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef));
        /* Check if context-dependent gv (gv without sil) */
        if (htsData.getUseContextDependentGV()) {
          if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false);
        }
        /* increment number of models in utterance model */
        um.setNumModel(um.getNumModel() + 1);
        /* update number of states */
        um.setNumState(um.getNumState() + cart.getNumStates());
        // get the duration from the element
        duration = Integer.parseInt(e.getAttribute("d")) * 0.001f; // in sec.
        // distribute the duration (in frames) among the five states, here it is done the same
        // amount for each state
        durInFrames = (int) (duration / fperiodsec);
        durStateInFrames = (int) (durInFrames / cart.getNumStates());
        m.setTotalDur(0); // reset to set new value according to duration
        for (s = 0; s < cart.getNumStates(); s++) {
          m.setDur(s, durStateInFrames);
          m.setTotalDur(m.getTotalDur() + m.getDur(s));
        }
        um.setTotalFrame(um.getTotalFrame() + m.getTotalDur());
        System.out.format(
            "createUttModel: duration=%.3f sec. durInFrames=%d  durStateInFrames=%d  m.getTotalDur()=%d\n",
            duration, durInFrames, durStateInFrames, m.getTotalDur());

        /* Find pdf for LF0, this function sets the pdf for each state.
         * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced
         * and some unvoiced).*/
        cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV());
        for (mstate = 0; mstate < cart.getNumStates(); mstate++) {
          for (frame = 0; frame < m.getDur(mstate); frame++)
            if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1);
        }
      }
      return um;
    } catch (Exception e) {
      throw new MaryConfigurationException(
          "Error searching in tree when creating utterance model. ", e);
    }
  }
Beispiel #3
0
  /**
   * Predict durations and state durations from predictFromElements and apply durations to
   * applyToElements. A utterance model is created that contains the predicted state durations.
   *
   * @param predictFromElements elements to predict from
   * @param applyToElements elements to apply predicted durations
   * @return HTSUttModel a utterance model
   * @throws MaryConfigurationException if error searching in HMM trees.
   */
  private HTSUttModel predictAndSetDuration(
      List<Element> predictFromElements, List<Element> applyToElements)
      throws MaryConfigurationException {
    List<Element> predictorElements = predictFromElements;
    List<Target> predictorTargets = getTargets(predictorElements);
    FeatureVector fv = null;
    HTSUttModel um = new HTSUttModel();
    FeatureDefinition feaDef = htsData.getFeatureDefinition();
    double diffdurOld = 0.0;
    double diffdurNew = 0.0;
    String durAttributeName = "d";
    try {
      // (1) Predict the values
      for (int i = 0; i < predictorTargets.size(); i++) {
        fv = predictorTargets.get(i).getFeatureVector();
        um.addUttModel(new HTSModel(cart.getNumStates()));
        HTSModel m = um.getUttModel(i);
        /* this function also sets the phone name, the phone between - and + */
        m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef));

        /* Check if context-dependent gv (gv without sil) */
        if (htsData.getUseContextDependentGV()) {
          if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false);
        }
        /* increment number of models in utterance model */
        um.setNumModel(um.getNumModel() + 1);
        /* update number of states */
        um.setNumState(um.getNumState() + cart.getNumStates());

        // Estimate state duration from state duration model (Gaussian)
        diffdurNew = cart.searchDurInCartTree(m, fv, htsData, diffdurOld);
        diffdurOld = diffdurNew;
        double duration = m.getTotalDur() * fperiodsec; // in seconds

        um.setTotalFrame(um.getTotalFrame() + m.getTotalDur());
        // System.out.format("HMMModel: phone=%s  duration=%.3f sec. m.getTotalDur()=%d\n",
        // m.getPhoneName(), duration, m.getTotalDur());

        /* Find pdf for LF0, this function sets the pdf for each state.
         * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced
         * and some unvoiced).*/
        cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV());
        for (int mstate = 0; mstate < cart.getNumStates(); mstate++) {
          for (int frame = 0; frame < m.getDur(mstate); frame++)
            if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1);
        }

        // set the value in elements
        Element element = applyToElements.get(i);
        // "evaluate" pseudo XPath syntax:
        // TODO this needs to be extended to take into account targetAttributeNames like "foo/@bar",
        // which would add the
        // bar attribute to the foo child of this element, creating the child if not already
        // present...
        if (durAttributeName.startsWith("@")) {
          durAttributeName = durAttributeName.replaceFirst("@", "");
        }
        String formattedTargetValue = String.format(targetAttributeFormat, duration);

        // System.out.println("HMMModel: formattedTargetValue = " + formattedTargetValue);

        // if the attribute already exists for this element, append targetValue:
        if (element.hasAttribute(durAttributeName)) {
          formattedTargetValue =
              element.getAttribute(durAttributeName) + " " + formattedTargetValue;
        }

        // set the new attribute value:
        element.setAttribute(durAttributeName, formattedTargetValue);
      }
      return um;
    } catch (Exception e) {
      throw new MaryConfigurationException("Error searching in tree when predicting duration. ", e);
    }
  }