/** * For the voice with the given name, return the list of vocalizations supported by this voice, * one vocalization per line. These values can be used in the "name" attribute of the vocalization * tag. * * @param voiceName * @return the list of vocalizations, or the empty string if the voice does not support * vocalizations. */ public static String getVocalizations(String voiceName) { Voice v = Voice.getVoice(voiceName); if (v == null || !v.hasVocalizationSupport()) { return ""; } VocalizationSynthesizer vs = v.getVocalizationSynthesizer(); assert vs != null; String[] vocalizations = vs.listAvailableVocalizations(); assert vocalizations != null; return StringUtils.toString(vocalizations); }
/** * Set the content data of this MaryData object from the given String. For XML data ({@link * MaryDataType#isXMLType()}), parse the String representation of the data into a DOM tree. * * @param dataString string representation of the input data. * @throws ParserConfigurationException ParserConfigurationException * @throws IOException IOException * @throws SAXException SAXException * @throws IllegalArgumentException if this method is called for MaryDataTypes that are neither * text nor XML. */ public void setData(String dataString) throws ParserConfigurationException, SAXException, IOException { // First, some data cleanup: dataString = StringUtils.purgeNonBreakingSpaces(dataString); // Now, deal with it. if (type.isXMLType()) { logger.debug( "Parsing XML input (" + (doValidate ? "" : "non-") + "validating): " + dataString); xmlDocument = DomUtils.parseDocument(dataString, doValidate); } else if (type.isTextType()) { logger.debug("Setting text input: " + dataString); plainText = dataString; } else { throw new IllegalArgumentException("Cannot set data of type " + type + " from a string"); } }
public static Vector<String> getDefaultVoiceExampleTexts() { String defaultVoiceName = getDefaultVoiceName(); Vector<String> defaultVoiceExampleTexts = null; defaultVoiceExampleTexts = StringUtils.processVoiceExampleText(getVoiceExampleText(defaultVoiceName)); if (defaultVoiceExampleTexts == null) // Try for general domain { String str = getExampleText("TEXT", Voice.getVoice(defaultVoiceName).getLocale()); if (str != null && str.length() > 0) { defaultVoiceExampleTexts = new Vector<String>(); defaultVoiceExampleTexts.add(str); } } return defaultVoiceExampleTexts; }
// Pseudo harmonics based noise generation for pseudo periods public static double[] synthesize( HntmSpeechSignal hnmSignal, HntmAnalyzerParams analysisParams, HntmSynthesizerParams synthesisParams, String referenceFile) { double[] noisePart = null; int trackNoToExamine = 1; int i, k, n; double t; // Time in seconds double tsik = 0.0; // Synthesis time in seconds double tsikPlusOne = 0.0; // Synthesis time in seconds double trackStartInSeconds, trackEndInSeconds; // double lastPeriodInSeconds = 0.0; int trackStartIndex, trackEndIndex; double akt; int numHarmonicsCurrentFrame, numHarmonicsPrevFrame, numHarmonicsNextFrame; int harmonicIndexShiftPrev, harmonicIndexShiftCurrent, harmonicIndexShiftNext; int maxNumHarmonics = 0; for (i = 0; i < hnmSignal.frames.length; i++) { if (hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f && hnmSignal.frames[i].n != null) { numHarmonicsCurrentFrame = (int) Math.floor(hnmSignal.samplingRateInHz / analysisParams.noiseF0InHz + 0.5); numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame); if (numHarmonicsCurrentFrame > maxNumHarmonics) maxNumHarmonics = numHarmonicsCurrentFrame; } } double aksi; double aksiPlusOne; float[] phasekis = null; float phasekiPlusOne; double ht; float phasekt = 0.0f; float phasekiEstimate = 0.0f; float phasekiPlusOneEstimate = 0.0f; int Mk; boolean isPrevNoised, isNoised, isNextNoised; boolean isTrackNoised, isNextTrackNoised, isPrevTrackNoised; int outputLen = SignalProcUtils.time2sample( hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz); noisePart = new double [outputLen]; // In fact, this should be prosody scaled length when you implement prosody // modifications Arrays.fill(noisePart, 0.0); // Write separate tracks to output double[][] noiseTracks = null; if (maxNumHarmonics > 0) { noiseTracks = new double[maxNumHarmonics][]; for (k = 0; k < maxNumHarmonics; k++) { noiseTracks[k] = new double[outputLen]; Arrays.fill(noiseTracks[k], 0.0); } phasekis = new float[maxNumHarmonics]; for (k = 0; k < maxNumHarmonics; k++) phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5)); } // int transitionLen = SignalProcUtils.time2sample( synthesisParams.unvoicedVoicedTrackTransitionInSeconds, hnmSignal.samplingRateInHz); Window transitionWin = Window.get(Window.HAMMING, transitionLen * 2); transitionWin.normalizePeakValue(1.0f); double[] halfTransitionWinLeft = transitionWin.getCoeffsLeftHalf(); float halfFs = hnmSignal.samplingRateInHz; for (i = 0; i < hnmSignal.frames.length; i++) { isPrevNoised = false; isNoised = false; isNextNoised = false; if (i > 0 && hnmSignal.frames[i - 1].n != null && hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < halfFs && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i - 1].n).ceps != null) isPrevNoised = true; if (i > 0 && hnmSignal.frames[i].n != null && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < halfFs && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps != null) isNoised = true; if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < halfFs && hnmSignal.frames[i + 1].n != null && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps != null) isNextNoised = true; numHarmonicsPrevFrame = 0; numHarmonicsCurrentFrame = 0; numHarmonicsNextFrame = 0; harmonicIndexShiftPrev = 0; harmonicIndexShiftCurrent = 0; harmonicIndexShiftNext = 0; if (isPrevNoised) { numHarmonicsPrevFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsPrevFrame = Math.max(0, numHarmonicsPrevFrame); harmonicIndexShiftPrev = (int) Math.floor( hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftPrev = Math.max(1, harmonicIndexShiftPrev); } if (isNoised) { numHarmonicsCurrentFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame); harmonicIndexShiftCurrent = (int) Math.floor( hnmSignal.frames[i].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent); } else if (!isNoised && isNextNoised) { numHarmonicsCurrentFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame); harmonicIndexShiftCurrent = (int) Math.floor( hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent); } if (isNextNoised) { numHarmonicsNextFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsNextFrame = Math.max(0, numHarmonicsNextFrame); harmonicIndexShiftNext = (int) Math.floor( hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftNext = Math.max(1, harmonicIndexShiftNext); } for (k = 0; k < numHarmonicsCurrentFrame; k++) { aksi = 0.0; aksiPlusOne = 0.0; phasekiPlusOne = 0.0f; isPrevTrackNoised = false; isTrackNoised = false; isNextTrackNoised = false; if (i > 0 && hnmSignal.frames[i - 1].n != null && numHarmonicsPrevFrame > k) isPrevTrackNoised = true; if (hnmSignal.frames[i].n != null && numHarmonicsCurrentFrame > k) isTrackNoised = true; if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].n != null && numHarmonicsNextFrame > k) isNextTrackNoised = true; tsik = hnmSignal.frames[i].tAnalysisInSeconds; if (i == 0) trackStartInSeconds = 0.0; else trackStartInSeconds = tsik; if (i == hnmSignal.frames.length - 1) tsikPlusOne = hnmSignal.originalDurationInSeconds; else tsikPlusOne = hnmSignal.frames[i + 1].tAnalysisInSeconds; trackEndInSeconds = tsikPlusOne; trackStartIndex = SignalProcUtils.time2sample(trackStartInSeconds, hnmSignal.samplingRateInHz); trackEndIndex = SignalProcUtils.time2sample(trackEndInSeconds, hnmSignal.samplingRateInHz); if (isTrackNoised && trackEndIndex - trackStartIndex + 1 > 0) { // Amplitudes if (isTrackNoised) { if (!analysisParams.useNoiseAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) aksi = RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps, (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) aksi = RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps, (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); } else { if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps.length) aksi = ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n) .ceps[k]; // Use amplitudes directly without cepstrum method else aksi = 0.0; } } else aksi = 0.0; if (isNextTrackNoised) { if (!analysisParams.useNoiseAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) aksiPlusOne = RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps, (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) aksiPlusOne = RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps, (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); } else { if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps.length) aksiPlusOne = ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n) .ceps[k]; // Use amplitudes directly without cepstrum method else aksiPlusOne = 0.0; } } else aksiPlusOne = 0.0; // // Phases phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5)); phasekiPlusOne = (float) (phasekis[k] + (k + harmonicIndexShiftCurrent) * MathUtils.TWOPI * analysisParams.noiseF0InHz * (tsikPlusOne - tsik)); // Equation (3.55) // if (!isPrevTrackNoised) trackStartIndex = Math.max(0, trackStartIndex - transitionLen); for (n = trackStartIndex; n <= Math.min(trackEndIndex, outputLen - 1); n++) { t = SignalProcUtils.sample2time(n, hnmSignal.samplingRateInHz); // if (t>=tsik && t<tsikPlusOne) { // Amplitude estimate akt = MathUtils.interpolatedSample(tsik, t, tsikPlusOne, aksi, aksiPlusOne); // // Phase estimate phasekt = (float) (phasekiPlusOne * (t - tsik) / (tsikPlusOne - tsik)); // if (!isPrevTrackNoised && n - trackStartIndex < transitionLen) noiseTracks[k][n] = halfTransitionWinLeft[n - trackStartIndex] * akt * Math.cos(phasekt); else noiseTracks[k][n] = akt * Math.cos(phasekt); } } phasekis[k] = phasekiPlusOne; } } } for (k = 0; k < noiseTracks.length; k++) { for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n]; } // Write separate tracks to output if (noiseTracks != null) { for (k = 0; k < noiseTracks.length; k++) { for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n]; } if (referenceFile != null && FileUtils.exists(referenceFile) && synthesisParams.writeSeparateHarmonicTracksToOutputs) { // Write separate tracks to output AudioInputStream inputAudio = null; try { inputAudio = AudioSystem.getAudioInputStream(new File(referenceFile)); } catch (UnsupportedAudioFileException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (inputAudio != null) { // k=1; for (k = 0; k < noiseTracks.length; k++) { noiseTracks[k] = MathUtils.divide(noiseTracks[k], 32767.0); DDSAudioInputStream outputAudio = new DDSAudioInputStream( new BufferedDoubleDataSource(noiseTracks[k]), inputAudio.getFormat()); String outFileName = StringUtils.getFolderName(referenceFile) + "noiseTrack" + String.valueOf(k + 1) + ".wav"; try { AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } // } return noisePart; }