private void loadMaryXmlF0(HTSUttModel um, HMMData htsData) throws Exception { logger.info("Using f0 from maryXML acoustparams"); int i, n, numVoiced; HTSModel m; double[] dval; double lastF0 = 0.0; numVoiced = 0; Vector<Double> f0Vector = new Vector<Double>(); for (i = 0; i < um.getNumUttModel(); i++) { m = um.getUttModel(i); // System.out.format("\nmodel=%s totalDur=%d numVoicedFrames=%d F0=%s\n", m.getPhoneName(), // m.getTotalDur(), // m.getNumVoiced(), m.getMaryXmlF0()); // get contour for this model if voiced frames and maryXml has f0 values dval = getContourSegment(m.getMaryXmlF0(), m.getNumVoiced()); // accumulate the values for (n = 0; n < dval.length; n++) f0Vector.add(dval[n]); } // interpolate values if necessary interpolateSegments(f0Vector); // create a new Lf0Pst with the values from maryXML HTSPStream newLf0Pst = new HTSPStream(3, f0Vector.size(), HMMData.FeatureType.LF0, htsData.getMaxLf0GvIter()); for (n = 0; n < f0Vector.size(); n++) newLf0Pst.setPar(n, 0, Math.log(f0Vector.get(n))); setlf0Pst(newLf0Pst); }
/* Save generated parameters in a binary file */ public void saveParam(String fileName, HTSPStream par, HMMData.FeatureType type) { int t, m, i; try { if (type == HMMData.FeatureType.LF0) { fileName += ".f0"; DataOutputStream data_out = new DataOutputStream(new FileOutputStream(fileName)); i = 0; for (t = 0; t < voiced.length; t++) { /* here par.getT are just the voiced!!! */ if (voiced[t]) { data_out.writeFloat((float) Math.exp(par.getPar(i, 0))); i++; } else data_out.writeFloat((float) 0.0); } data_out.close(); } else if (type == HMMData.FeatureType.MGC) { fileName += ".mgc"; DataOutputStream data_out = new DataOutputStream(new FileOutputStream(fileName)); for (t = 0; t < par.getT(); t++) for (m = 0; m < par.getOrder(); m++) data_out.writeFloat((float) par.getPar(t, m)); data_out.close(); } logger.info("saveParam in file: " + fileName); } catch (IOException e) { logger.info("IO exception = " + e); } }
private void setRealisedF0(HTSPStream lf0Pst, HTSUttModel um, int numStates) { int t = 0; int vt = 0; for (int i = 0; i < um.getNumUttModel(); i++) { HTSModel m = um.getUttModel(i); int numVoicedInModel = m.getNumVoiced(); String formattedF0 = ""; int k = 1; for (int state = 0; state < numStates; state++) { for (int frame = 0; frame < m.getDur(state); frame++) { if (voiced[t++]) { float f0 = (float) Math.exp(lf0Pst.getPar(vt++, 0)); formattedF0 += "(" + Integer.toString((int) ((k * 100.0) / numVoicedInModel)) + "," + Integer.toString((int) f0) + ")"; k++; } } // for unvoiced frame } // for state if (!formattedF0.contentEquals("")) { m.setMaryXmlF0(formattedF0); // m.setUnit_f0ArrayStr(formattedF0); // System.out.println("ph=" + m.getPhoneName() + " " + formattedF0); } } // for model in utterance model list }
/* Save generated parameters in a binary file */ public void saveParamMaryFormat(String fileName, HTSPStream par, HMMData.FeatureType type) { int t, m, i; double ws = 0.025; /* window size in seconds */ double ss = 0.005; /* skip size in seconds */ int fs = 16000; /* sampling rate */ try { if (type == HMMData.FeatureType.LF0) { fileName += ".ptc"; /* * DataOutputStream data_out = new DataOutputStream (new FileOutputStream (fileName)); * data_out.writeFloat((float)(ws*fs)); data_out.writeFloat((float)(ss*fs)); data_out.writeFloat((float)fs); * data_out.writeFloat(voiced.length); * * i=0; for(t=0; t<voiced.length; t++){ // here par.getT are just the voiced!!! so the actual length of frames can * be taken from the voiced array if( voiced[t] ){ data_out.writeFloat((float)Math.exp(par.getPar(i,0))); i++; * }System.out.println("GEN f0s[" + t + "]=" + Math.exp(lf0Pst.getPar(i,0))); else * data_out.writeFloat((float)0.0); } data_out.close(); */ i = 0; double f0s[] = new double[voiced.length]; // System.out.println("voiced.length=" + voiced.length); for (t = 0; t < voiced.length; t++) { // here par.getT are just the voiced!!! so the actual length of frames can // be taken from the voiced array if (voiced[t]) { f0s[t] = Math.exp(par.getPar(i, 0)); i++; } else f0s[t] = 0.0; System.out.println("GEN f0s[" + t + "]=" + f0s[t]); } /* * i am using this function but it changes the values of sw, and ss *samplingrate+0.5??? for the HTS values * ss=0.005 and sw=0.025 is not a problem though */ PitchReaderWriter.write_pitch_file(fileName, f0s, (float) (ws), (float) (ss), fs); } else if (type == HMMData.FeatureType.MGC) { int numfrm = par.getT(); int dimension = par.getOrder(); Mfccs mgc = new Mfccs(numfrm, dimension); fileName += ".mfc"; for (t = 0; t < par.getT(); t++) for (m = 0; m < par.getOrder(); m++) mgc.mfccs[t][m] = par.getPar(t, m); mgc.params.samplingRate = fs; /* samplingRateInHz */ mgc.params.skipsize = (float) ss; /* skipSizeInSeconds */ mgc.params.winsize = (float) ws; /* windowSizeInSeconds */ mgc.writeMfccFile(fileName); /* * The whole set for header is in the following order: ler.writeInt(numfrm); ler.writeInt(dimension); * ler.writeFloat(winsize); ler.writeFloat(skipsize); ler.writeInt(samplingRate); */ } logger.info("saveParam in file: " + fileName); } catch (IOException e) { logger.info("IO exception = " + e); } }
/** * HTS maximum likelihood parameter generation * * @param um : utterance model sequence after processing Mary context features * @param htsData : HMM pdfs model set. * @throws Exception Exception */ public void htsMaximumLikelihoodParameterGeneration(HTSUttModel um, final HMMData htsData) throws Exception { CartTreeSet ms = htsData.getCartTreeSet(); /* Initialisation of PStream objects */ /* Initialise Parameter generation using UttModel um and Modelset ms */ /* initialise PStream objects for all the parameters that are going to be generated: */ /* mceppst, strpst, magpst, lf0pst */ /* Here i should pass the window files to initialise the dynamic windows dw */ /* for the moment the dw are all the same and hard-coded */ if (htsData.getPdfMgcStream() != null) mcepPst = new HTSPStream( ms.getMcepVsize(), um.getTotalFrame(), HMMData.FeatureType.MGC, htsData.getMaxMgcGvIter()); /* for lf0 count just the number of lf0frames that are voiced or non-zero */ if (htsData.getPdfLf0Stream() != null) lf0Pst = new HTSPStream( ms.getLf0Stream(), um.getLf0Frame(), HMMData.FeatureType.LF0, htsData.getMaxLf0GvIter()); /* The following are optional in case of generating mixed excitation */ if (htsData.getPdfStrStream() != null) strPst = new HTSPStream( ms.getStrVsize(), um.getTotalFrame(), HMMData.FeatureType.STR, htsData.getMaxStrGvIter()); if (htsData.getPdfMagStream() != null) magPst = new HTSPStream( ms.getMagVsize(), um.getTotalFrame(), HMMData.FeatureType.MAG, htsData.getMaxMagGvIter()); int lf0Frame = 0; // counts voiced frames int uttFrame = 0; // counts all frames voiced = new boolean[um.getTotalFrame()]; // local variables for faster access int msNumStates = ms.getNumStates(); int totalFrames = um.getTotalFrame(); for (int i = 0; i < um.getNumUttModel(); i++) { HTSModel m = um.getUttModel(i); int numVoicedInModel = 0; for (int state = 0; state < msNumStates; state++) { int dur = m.getDur(state); Arrays.fill(voiced, uttFrame, uttFrame += dur, m.getVoiced(state)); if (m.getVoiced(state)) lf0Frame += dur; } } /* mcepframe and lf0frame are used in the original code to initialise the T field */ /* in each pst, but here the pst are already initialised .... */ logger.debug("utteranceFrame=" + uttFrame + " lf0frame=" + lf0Frame); // Step 1: initialize fields in the parameter streams uttFrame = 0; lf0Frame = 0; /* copy pdfs */ for (int i = 0; i < um.getNumUttModel(); i++) { HTSModel m = um.getUttModel(i); boolean gvSwitch = m.getGvSwitch(); for (int state = 0; state < msNumStates; state++) { for (int frame = 0; frame < m.getDur(state); frame++) { /* copy pdfs for mcep */ if (mcepPst != null) { mcepPst.setMseq(uttFrame, m.getMean(FeatureType.MGC, state)); mcepPst.setVseq(uttFrame, m.getVariance(FeatureType.MGC, state)); if (!gvSwitch) mcepPst.setGvSwitch(uttFrame, false); } /* copy pdf for str */ if (strPst != null) { strPst.setMseq(uttFrame, m.getMean(FeatureType.STR, state)); strPst.setVseq(uttFrame, m.getVariance(FeatureType.STR, state)); if (!gvSwitch) strPst.setGvSwitch(uttFrame, false); } /* copy pdf for mag */ if (magPst != null) { magPst.setMseq(uttFrame, m.getMean(FeatureType.MAG, state)); magPst.setVseq(uttFrame, m.getVariance(FeatureType.MAG, state)); if (!gvSwitch) magPst.setGvSwitch(uttFrame, false); } /* copy pdfs for lf0 */ if (lf0Pst != null && !htsData.getUseAcousticModels()) { for (int k = 0; k < ms.getLf0Stream(); k++) { boolean nobound = true; /* check if current frame is voiced/unvoiced boundary or not */ for (int n = lf0Pst.getDWLeftBoundary(k); n <= lf0Pst.getDWRightBoundary(k); n++) if ((uttFrame + n) <= 0 || totalFrames <= (uttFrame + n)) nobound = false; else nobound = (nobound && voiced[uttFrame + n]); /* copy pdfs */ if (voiced[uttFrame]) { lf0Pst.setMseq(lf0Frame, k, m.getLf0Mean(state, k)); if (nobound || k == 0) lf0Pst.setIvseq(lf0Frame, k, finv(m.getLf0Variance(state, k))); else /* the variances for dynamic features are set to inf on v/uv boundary */ lf0Pst.setIvseq(lf0Frame, k, 0.0); } } } if (voiced[uttFrame]) { if (!gvSwitch) lf0Pst.setGvSwitch(lf0Frame, false); lf0Frame++; } uttFrame++; } /* for each frame in this state */ } /* for each state in this model */ } /* for each model in this utterance */ GVModelSet gvms = htsData.getGVModelSet(); // Step 2: set dynamic features to infinity on the borders for MGC/STR/MAG if (mcepPst != null) mcepPst.fixDynFeatOnBoundaries(); if (strPst != null) strPst.fixDynFeatOnBoundaries(); if (magPst != null) magPst.fixDynFeatOnBoundaries(); // Step 3: optimize individual parameter streams /* parameter generation for mcep */ if (mcepPst != null) { logger.info("Parameter generation for MGC: "); if (htsData.getUseGV() && (htsData.getPdfMgcGVStream() != null)) mcepPst.setGvMeanVar(gvms.getGVmeanMgc(), gvms.getGVcovInvMgc()); mcepPst.mlpg(htsData, htsData.getUseGV()); } // parameter generation for lf0 */ if (htsData.getUseAcousticModels()) loadMaryXmlF0(um, htsData); else if (lf0Pst != null) { logger.info("Parameter generation for LF0: "); if (htsData.getUseGV() && (htsData.getPdfLf0GVStream() != null)) lf0Pst.setGvMeanVar(gvms.getGVmeanLf0(), gvms.getGVcovInvLf0()); lf0Pst.mlpg(htsData, htsData.getUseGV()); // here we need set realisedF0 setRealisedF0(lf0Pst, um, msNumStates); } /* parameter generation for str */ boolean useGV = false; if (strPst != null) { logger.debug("Parameter generation for STR "); if (htsData.getUseGV() && (htsData.getPdfStrGVStream() != null)) { useGV = true; strPst.setGvMeanVar(gvms.getGVmeanStr(), gvms.getGVcovInvStr()); } strPst.mlpg(htsData, useGV); } /* parameter generation for mag */ useGV = false; if (magPst != null) { logger.info("Parameter generation for MAG "); if (htsData.getUseGV() && (htsData.getPdfMagGVStream() != null)) { useGV = true; magPst.setGvMeanVar(gvms.getGVmeanMag(), gvms.getGVcovInvMag()); } magPst.mlpg(htsData, useGV); } } /* method htsMaximumLikelihoodParameterGeneration */