/** * Predict F0 from the utterance model and apply to elements * * @param applyToElements elements to apply predicted F0s * @param um utterance model that contains the set of elements (phonemes) and state durations for * generating F0. * @throws MaryConfigurationException if error generating F0 out of HMMs trees and pdfs. */ private void predictAndSetF0(List<Element> applyToElements, HTSUttModel um) throws MaryConfigurationException { HTSModel m; try { String f0AttributeName = "f0"; HTSParameterGeneration pdf2par = new HTSParameterGeneration(); /* Once we have all the phone models Process UttModel */ /* Generate sequence of speech parameter vectors, generate parameters out of sequence of pdf's */ boolean debug = false; /* so it does not save the generated parameters. */ /* this function generates features just for the trees and pdf that are not null in the HMM cart*/ pdf2par.htsMaximumLikelihoodParameterGeneration(um, htsData); // (2) include the predicted values in applicableElements (as it is done in Model) boolean voiced[] = pdf2par.getVoicedArray(); int numVoiced = 0; // make sure that the number of applicable elements is the same as the predicted number of // elements assert applyToElements.size() == um.getNumModel(); float f0; String formattedTargetValue; int t = 0; for (int i = 0; i < applyToElements.size(); i++) { // this will be the same as the utterance model set m = um.getUttModel(i); int k = 1; int numVoicedInModel = m.getNumVoiced(); formattedTargetValue = ""; // System.out.format("phone = %s dur_in_frames=%d num_voiced_frames=%d : ", // m.getPhoneName(), m.getTotalDur(), numVoicedInModel); for (int mstate = 0; mstate < cart.getNumStates(); mstate++) { for (int frame = 0; frame < m.getDur(mstate); frame++) { if (voiced[ t++]) { // numVoiced and t are not the same because voiced values can be true or // false, numVoiced count just the voiced f0 = (float) Math.exp(pdf2par.getlf0Pst().getPar(numVoiced++, 0)); formattedTargetValue += "(" + Integer.toString((int) ((k * 100.0) / numVoicedInModel)) + "," + Integer.toString((int) f0) + ")"; k++; } } } Element element = applyToElements.get(i); // "evaluate" pseudo XPath syntax: // TODO this needs to be extended to take into account targetAttributeNames like "foo/@bar", // which would add the // bar attribute to the foo child of this element, creating the child if not already // present... if (f0AttributeName.startsWith("@")) { f0AttributeName = f0AttributeName.replaceFirst("@", ""); } // format targetValue according to targetAttributeFormat // String formattedTargetValue = String.format(targetAttributeFormat, targetValue); // set the new attribute value: // if the whole segment is unvoiced then f0 should not be fixed? if (formattedTargetValue.length() > 0) element.setAttribute(f0AttributeName, formattedTargetValue); // System.out.println(formattedTargetValue); } // once finished re-set to null um // um = null; } catch (Exception e) { throw new MaryConfigurationException("Error generating F0 out of HMMs trees and pdfs. ", e); } }
/** * Create a utterance model list from feature vectors predicted from elements. * * @param predictFromElements elements from MaryXML from where to get feature vectors. * @return Utterance model um containing state durations and pdfs already searched on the trees to * generate F0. * @throws MaryConfigurationException if error searching in HMM trees. */ private HTSUttModel createUttModel(List<Element> predictFromElements) throws MaryConfigurationException { int i, k, s, t, mstate, frame, durInFrames, durStateInFrames, numVoicedInModel; HTSModel m; List<Element> predictorElements = predictFromElements; List<Target> predictorTargets = getTargets(predictorElements); FeatureVector fv; HTSUttModel um = new HTSUttModel(); FeatureDefinition feaDef = htsData.getFeatureDefinition(); float duration; double diffdurOld = 0.0; double diffdurNew = 0.0; float f0s[] = null; try { // (1) Predict the values for (i = 0; i < predictorTargets.size(); i++) { fv = predictorTargets.get(i).getFeatureVector(); Element e = predictFromElements.get(i); um.addUttModel(new HTSModel(cart.getNumStates())); m = um.getUttModel(i); /* this function also sets the phone name, the phone between - and + */ m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef)); /* Check if context-dependent gv (gv without sil) */ if (htsData.getUseContextDependentGV()) { if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false); } /* increment number of models in utterance model */ um.setNumModel(um.getNumModel() + 1); /* update number of states */ um.setNumState(um.getNumState() + cart.getNumStates()); // get the duration from the element duration = Integer.parseInt(e.getAttribute("d")) * 0.001f; // in sec. // distribute the duration (in frames) among the five states, here it is done the same // amount for each state durInFrames = (int) (duration / fperiodsec); durStateInFrames = (int) (durInFrames / cart.getNumStates()); m.setTotalDur(0); // reset to set new value according to duration for (s = 0; s < cart.getNumStates(); s++) { m.setDur(s, durStateInFrames); m.setTotalDur(m.getTotalDur() + m.getDur(s)); } um.setTotalFrame(um.getTotalFrame() + m.getTotalDur()); System.out.format( "createUttModel: duration=%.3f sec. durInFrames=%d durStateInFrames=%d m.getTotalDur()=%d\n", duration, durInFrames, durStateInFrames, m.getTotalDur()); /* Find pdf for LF0, this function sets the pdf for each state. * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced * and some unvoiced).*/ cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV()); for (mstate = 0; mstate < cart.getNumStates(); mstate++) { for (frame = 0; frame < m.getDur(mstate); frame++) if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1); } } return um; } catch (Exception e) { throw new MaryConfigurationException( "Error searching in tree when creating utterance model. ", e); } }
/** * Predict durations and state durations from predictFromElements and apply durations to * applyToElements. A utterance model is created that contains the predicted state durations. * * @param predictFromElements elements to predict from * @param applyToElements elements to apply predicted durations * @return HTSUttModel a utterance model * @throws MaryConfigurationException if error searching in HMM trees. */ private HTSUttModel predictAndSetDuration( List<Element> predictFromElements, List<Element> applyToElements) throws MaryConfigurationException { List<Element> predictorElements = predictFromElements; List<Target> predictorTargets = getTargets(predictorElements); FeatureVector fv = null; HTSUttModel um = new HTSUttModel(); FeatureDefinition feaDef = htsData.getFeatureDefinition(); double diffdurOld = 0.0; double diffdurNew = 0.0; String durAttributeName = "d"; try { // (1) Predict the values for (int i = 0; i < predictorTargets.size(); i++) { fv = predictorTargets.get(i).getFeatureVector(); um.addUttModel(new HTSModel(cart.getNumStates())); HTSModel m = um.getUttModel(i); /* this function also sets the phone name, the phone between - and + */ m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef)); /* Check if context-dependent gv (gv without sil) */ if (htsData.getUseContextDependentGV()) { if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false); } /* increment number of models in utterance model */ um.setNumModel(um.getNumModel() + 1); /* update number of states */ um.setNumState(um.getNumState() + cart.getNumStates()); // Estimate state duration from state duration model (Gaussian) diffdurNew = cart.searchDurInCartTree(m, fv, htsData, diffdurOld); diffdurOld = diffdurNew; double duration = m.getTotalDur() * fperiodsec; // in seconds um.setTotalFrame(um.getTotalFrame() + m.getTotalDur()); // System.out.format("HMMModel: phone=%s duration=%.3f sec. m.getTotalDur()=%d\n", // m.getPhoneName(), duration, m.getTotalDur()); /* Find pdf for LF0, this function sets the pdf for each state. * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced * and some unvoiced).*/ cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV()); for (int mstate = 0; mstate < cart.getNumStates(); mstate++) { for (int frame = 0; frame < m.getDur(mstate); frame++) if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1); } // set the value in elements Element element = applyToElements.get(i); // "evaluate" pseudo XPath syntax: // TODO this needs to be extended to take into account targetAttributeNames like "foo/@bar", // which would add the // bar attribute to the foo child of this element, creating the child if not already // present... if (durAttributeName.startsWith("@")) { durAttributeName = durAttributeName.replaceFirst("@", ""); } String formattedTargetValue = String.format(targetAttributeFormat, duration); // System.out.println("HMMModel: formattedTargetValue = " + formattedTargetValue); // if the attribute already exists for this element, append targetValue: if (element.hasAttribute(durAttributeName)) { formattedTargetValue = element.getAttribute(durAttributeName) + " " + formattedTargetValue; } // set the new attribute value: element.setAttribute(durAttributeName, formattedTargetValue); } return um; } catch (Exception e) { throw new MaryConfigurationException("Error searching in tree when predicting duration. ", e); } }