public TrainedLTS(AllophoneSet aPhonSet, CART predictionTree) { this.allophoneSet = aPhonSet; this.tree = predictionTree; this.featureDefinition = tree.getFeatureDefinition(); this.indexPredictedFeature = featureDefinition.getFeatureIndex(PREDICTED_STRING_FEATURENAME); Properties props = tree.getProperties(); if (props == null) throw new IllegalArgumentException("Prediction tree does not contain properties"); convertToLowercase = Boolean.parseBoolean(props.getProperty("lowercase")); context = Integer.parseInt(props.getProperty("context")); }
/** * Convenience method to load tree from an inputstream * * @param treeStream * @throws IOException */ public void loadTree(InputStream treeStream) throws IOException, MaryConfigurationException { MaryCARTReader cartReader = new MaryCARTReader(); this.tree = cartReader.loadFromStream(treeStream); this.featureDefinition = tree.getFeatureDefinition(); this.indexPredictedFeature = featureDefinition.getFeatureIndex(PREDICTED_STRING_FEATURENAME); this.convertToLowercase = false; Properties props = tree.getProperties(); if (props == null) throw new IllegalArgumentException("Prediction tree does not contain properties"); convertToLowercase = Boolean.parseBoolean(props.getProperty("lowercase")); context = Integer.parseInt(props.getProperty("context")); }
/** * Create a utterance model list from feature vectors predicted from elements. * * @param predictFromElements elements from MaryXML from where to get feature vectors. * @return Utterance model um containing state durations and pdfs already searched on the trees to * generate F0. * @throws MaryConfigurationException if error searching in HMM trees. */ private HTSUttModel createUttModel(List<Element> predictFromElements) throws MaryConfigurationException { int i, k, s, t, mstate, frame, durInFrames, durStateInFrames, numVoicedInModel; HTSModel m; List<Element> predictorElements = predictFromElements; List<Target> predictorTargets = getTargets(predictorElements); FeatureVector fv; HTSUttModel um = new HTSUttModel(); FeatureDefinition feaDef = htsData.getFeatureDefinition(); float duration; double diffdurOld = 0.0; double diffdurNew = 0.0; float f0s[] = null; try { // (1) Predict the values for (i = 0; i < predictorTargets.size(); i++) { fv = predictorTargets.get(i).getFeatureVector(); Element e = predictFromElements.get(i); um.addUttModel(new HTSModel(cart.getNumStates())); m = um.getUttModel(i); /* this function also sets the phone name, the phone between - and + */ m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef)); /* Check if context-dependent gv (gv without sil) */ if (htsData.getUseContextDependentGV()) { if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false); } /* increment number of models in utterance model */ um.setNumModel(um.getNumModel() + 1); /* update number of states */ um.setNumState(um.getNumState() + cart.getNumStates()); // get the duration from the element duration = Integer.parseInt(e.getAttribute("d")) * 0.001f; // in sec. // distribute the duration (in frames) among the five states, here it is done the same // amount for each state durInFrames = (int) (duration / fperiodsec); durStateInFrames = (int) (durInFrames / cart.getNumStates()); m.setTotalDur(0); // reset to set new value according to duration for (s = 0; s < cart.getNumStates(); s++) { m.setDur(s, durStateInFrames); m.setTotalDur(m.getTotalDur() + m.getDur(s)); } um.setTotalFrame(um.getTotalFrame() + m.getTotalDur()); System.out.format( "createUttModel: duration=%.3f sec. durInFrames=%d durStateInFrames=%d m.getTotalDur()=%d\n", duration, durInFrames, durStateInFrames, m.getTotalDur()); /* Find pdf for LF0, this function sets the pdf for each state. * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced * and some unvoiced).*/ cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV()); for (mstate = 0; mstate < cart.getNumStates(); mstate++) { for (frame = 0; frame < m.getDur(mstate); frame++) if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1); } } return um; } catch (Exception e) { throw new MaryConfigurationException( "Error searching in tree when creating utterance model. ", e); } }
/** * Predict durations and state durations from predictFromElements and apply durations to * applyToElements. A utterance model is created that contains the predicted state durations. * * @param predictFromElements elements to predict from * @param applyToElements elements to apply predicted durations * @return HTSUttModel a utterance model * @throws MaryConfigurationException if error searching in HMM trees. */ private HTSUttModel predictAndSetDuration( List<Element> predictFromElements, List<Element> applyToElements) throws MaryConfigurationException { List<Element> predictorElements = predictFromElements; List<Target> predictorTargets = getTargets(predictorElements); FeatureVector fv = null; HTSUttModel um = new HTSUttModel(); FeatureDefinition feaDef = htsData.getFeatureDefinition(); double diffdurOld = 0.0; double diffdurNew = 0.0; String durAttributeName = "d"; try { // (1) Predict the values for (int i = 0; i < predictorTargets.size(); i++) { fv = predictorTargets.get(i).getFeatureVector(); um.addUttModel(new HTSModel(cart.getNumStates())); HTSModel m = um.getUttModel(i); /* this function also sets the phone name, the phone between - and + */ m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef)); /* Check if context-dependent gv (gv without sil) */ if (htsData.getUseContextDependentGV()) { if (m.getPhoneName().contentEquals("_")) m.setGvSwitch(false); } /* increment number of models in utterance model */ um.setNumModel(um.getNumModel() + 1); /* update number of states */ um.setNumState(um.getNumState() + cart.getNumStates()); // Estimate state duration from state duration model (Gaussian) diffdurNew = cart.searchDurInCartTree(m, fv, htsData, diffdurOld); diffdurOld = diffdurNew; double duration = m.getTotalDur() * fperiodsec; // in seconds um.setTotalFrame(um.getTotalFrame() + m.getTotalDur()); // System.out.format("HMMModel: phone=%s duration=%.3f sec. m.getTotalDur()=%d\n", // m.getPhoneName(), duration, m.getTotalDur()); /* Find pdf for LF0, this function sets the pdf for each state. * and determines, according to the HMM models, whether the states are voiced or unvoiced, (it can be possible that some states are voiced * and some unvoiced).*/ cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV()); for (int mstate = 0; mstate < cart.getNumStates(); mstate++) { for (int frame = 0; frame < m.getDur(mstate); frame++) if (m.getVoiced(mstate)) um.setLf0Frame(um.getLf0Frame() + 1); } // set the value in elements Element element = applyToElements.get(i); // "evaluate" pseudo XPath syntax: // TODO this needs to be extended to take into account targetAttributeNames like "foo/@bar", // which would add the // bar attribute to the foo child of this element, creating the child if not already // present... if (durAttributeName.startsWith("@")) { durAttributeName = durAttributeName.replaceFirst("@", ""); } String formattedTargetValue = String.format(targetAttributeFormat, duration); // System.out.println("HMMModel: formattedTargetValue = " + formattedTargetValue); // if the attribute already exists for this element, append targetValue: if (element.hasAttribute(durAttributeName)) { formattedTargetValue = element.getAttribute(durAttributeName) + " " + formattedTargetValue; } // set the new attribute value: element.setAttribute(durAttributeName, formattedTargetValue); } return um; } catch (Exception e) { throw new MaryConfigurationException("Error searching in tree when predicting duration. ", e); } }