示例#1
0
  /**
   * Create a utterance model list from feature vectors predicted from elements.
   *
   * @param predictFromElements elements from MaryXML from where to get feature vectors.
   * @return Utterance model um containing state durations and pdfs already searched on the trees to
   *     generate F0.
   * @throws MaryConfigurationException if error searching in HMM trees.
   */
  private HTSUttModel createUttModel(List<Element> predictFromElements)
      throws MaryConfigurationException {
    int i, k, s, t, mstate, frame, durInFrames, durStateInFrames, numVoicedInModel;
    HTSModel m;
    List<Element> predictorElements = predictFromElements;
    List<Target> predictorTargets = getTargets(predictorElements);
    FeatureVector fv;
    HTSUttModel um = new HTSUttModel();
    FeatureDefinition feaDef = htsData.getFeatureDefinition();
    float duration;
    double diffdurOld = 0.0;
    double diffdurNew = 0.0;
    float f0s[] = null;
    try {
      // (1) Predict the values
      for (i = 0; i < predictorTargets.size(); i++) {
        fv = predictorTargets.get(i).getFeatureVector();
        Element e = predictFromElements.get(i);
        um.addUttModel(new HTSModel(cart.getNumStates()));
        m = um.getUttModel(i);
        /* this function also sets the phone name, the phone between - and + */
        m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef));
        /* Check if context-dependent gv (gv without sil) */
        if (htsData.getUseContextDependentGV()) {
          if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false);
        }
        /* increment number of models in utterance model */
        um.setNumModel(um.getNumModel() + 1);
        /* update number of states */
        um.setNumState(um.getNumState() + cart.getNumStates());
        // get the duration from the element
        duration = Integer.parseInt(e.getAttribute("d")) * 0.001f; // in sec.
        // distribute the duration (in frames) among the five states, here it is done the same
        // amount for each state
        durInFrames = (int) (duration / fperiodsec);
        durStateInFrames = (int) (durInFrames / cart.getNumStates());
        m.setTotalDur(0); // reset to set new value according to duration
        for (s = 0; s < cart.getNumStates(); s++) {
          m.setDur(s, durStateInFrames);
          m.setTotalDur(m.getTotalDur() + m.getDur(s));
        }
        um.setTotalFrame(um.getTotalFrame() + m.getTotalDur());
        System.out.format(
            "createUttModel: duration=%.3f sec. durInFrames=%d  durStateInFrames=%d  m.getTotalDur()=%d\n",
            duration, durInFrames, durStateInFrames, m.getTotalDur());

        /* Find pdf for LF0, this function sets the pdf for each state.
         * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced
         * and some unvoiced).*/
        cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV());
        for (mstate = 0; mstate < cart.getNumStates(); mstate++) {
          for (frame = 0; frame < m.getDur(mstate); frame++)
            if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1);
        }
      }
      return um;
    } catch (Exception e) {
      throw new MaryConfigurationException(
          "Error searching in tree when creating utterance model. ", e);
    }
  }
示例#2
0
  /**
   * Predict durations and state durations from predictFromElements and apply durations to
   * applyToElements. A utterance model is created that contains the predicted state durations.
   *
   * @param predictFromElements elements to predict from
   * @param applyToElements elements to apply predicted durations
   * @return HTSUttModel a utterance model
   * @throws MaryConfigurationException if error searching in HMM trees.
   */
  private HTSUttModel predictAndSetDuration(
      List<Element> predictFromElements, List<Element> applyToElements)
      throws MaryConfigurationException {
    List<Element> predictorElements = predictFromElements;
    List<Target> predictorTargets = getTargets(predictorElements);
    FeatureVector fv = null;
    HTSUttModel um = new HTSUttModel();
    FeatureDefinition feaDef = htsData.getFeatureDefinition();
    double diffdurOld = 0.0;
    double diffdurNew = 0.0;
    String durAttributeName = "d";
    try {
      // (1) Predict the values
      for (int i = 0; i < predictorTargets.size(); i++) {
        fv = predictorTargets.get(i).getFeatureVector();
        um.addUttModel(new HTSModel(cart.getNumStates()));
        HTSModel m = um.getUttModel(i);
        /* this function also sets the phone name, the phone between - and + */
        m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef));

        /* Check if context-dependent gv (gv without sil) */
        if (htsData.getUseContextDependentGV()) {
          if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false);
        }
        /* increment number of models in utterance model */
        um.setNumModel(um.getNumModel() + 1);
        /* update number of states */
        um.setNumState(um.getNumState() + cart.getNumStates());

        // Estimate state duration from state duration model (Gaussian)
        diffdurNew = cart.searchDurInCartTree(m, fv, htsData, diffdurOld);
        diffdurOld = diffdurNew;
        double duration = m.getTotalDur() * fperiodsec; // in seconds

        um.setTotalFrame(um.getTotalFrame() + m.getTotalDur());
        // System.out.format("HMMModel: phone=%s  duration=%.3f sec. m.getTotalDur()=%d\n",
        // m.getPhoneName(), duration, m.getTotalDur());

        /* Find pdf for LF0, this function sets the pdf for each state.
         * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced
         * and some unvoiced).*/
        cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV());
        for (int mstate = 0; mstate < cart.getNumStates(); mstate++) {
          for (int frame = 0; frame < m.getDur(mstate); frame++)
            if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1);
        }

        // set the value in elements
        Element element = applyToElements.get(i);
        // "evaluate" pseudo XPath syntax:
        // TODO this needs to be extended to take into account targetAttributeNames like "foo/@bar",
        // which would add the
        // bar attribute to the foo child of this element, creating the child if not already
        // present...
        if (durAttributeName.startsWith("@")) {
          durAttributeName = durAttributeName.replaceFirst("@", "");
        }
        String formattedTargetValue = String.format(targetAttributeFormat, duration);

        // System.out.println("HMMModel: formattedTargetValue = " + formattedTargetValue);

        // if the attribute already exists for this element, append targetValue:
        if (element.hasAttribute(durAttributeName)) {
          formattedTargetValue =
              element.getAttribute(durAttributeName) + " " + formattedTargetValue;
        }

        // set the new attribute value:
        element.setAttribute(durAttributeName, formattedTargetValue);
      }
      return um;
    } catch (Exception e) {
      throw new MaryConfigurationException("Error searching in tree when predicting duration. ", e);
    }
  }