// Plots the values in x // If bAutoClose is specified, the figure is closed after milliSecondsToClose milliseconds // milliSecondsToClose: has no effect if bAutoClose is false public static void plot( double[] xIn, int startInd, int endInd, String strTitle, boolean bAutoClose, int milliSecondsToClose) { if (xIn != null) { endInd = MathUtils.CheckLimits(endInd, 0, xIn.length - 1); startInd = MathUtils.CheckLimits(startInd, 0, endInd); double[] x = new double[endInd - startInd + 1]; System.arraycopy(xIn, startInd, x, 0, x.length); FunctionGraph graph = new FunctionGraph(400, 200, 0, 1, x); JFrame frame = graph.showInJFrame(strTitle, 500, 300, true, false); if (bAutoClose) { try { Thread.sleep(milliSecondsToClose); } catch (InterruptedException e) { } frame.dispose(); } } }
/** * Here the actual processing of the frequency-domain frame (in cartesian coordinates) happens. * This implementation converts to polar coordinates calls processPolar(), and converts the result * back to cartesian coordinates. * * @param real real * @param imag imag */ protected final void process(double[] real, double[] imag) { MathUtils.toPolarCoordinates(real, imag); // for readability: double[] r = real; double[] phi = imag; // Now do something meaningful with the fourier transform processPolar(r, phi); // Convert back: MathUtils.toCartesianCoordinates(real, imag); }
public void testStretch2() { double[] signal = FFTTest.getSampleSignal(16000); int samplingRate = 8000; double rateFactor = 0.5; NaiveVocoder nv = new NaiveVocoder(new BufferedDoubleDataSource(signal), samplingRate, rateFactor); double[] result = nv.getAllData(); double meanSignalEnergy = MathUtils.mean(MathUtils.multiply(signal, signal)); double meanResultEnergy = MathUtils.mean(MathUtils.multiply(result, result)); double percentDifference = Math.abs(meanSignalEnergy - meanResultEnergy) / meanSignalEnergy * 100; assertTrue( "Stretching changed signal energy by " + percentDifference + "%", percentDifference < 6); }
@Test public void testIdentity() { double[] signal = FFTTest.getSampleSignal(16000); int samplingRate = 8000; FrameOverlapAddSource ola = new FrameOverlapAddSource(new BufferedDoubleDataSource(signal), 2048, samplingRate, null); double[] result = ola.getAllData(); double err = MathUtils.sumSquaredError(signal, result); assertTrue("Error: " + err, err < 1.E-19); }
public static void main(String[] args) { int samplingRate = Integer.getInteger("samplingrate", 1).intValue(); int windowLengthMs = Integer.getInteger("windowlength.ms", 0).intValue(); int windowLength = Integer.getInteger("windowlength.samples", 512).intValue(); // If both are given, use window length in milliseconds: if (windowLengthMs != 0) windowLength = windowLengthMs * samplingRate / 1000; int fftSize = Math.max(4096, MathUtils.closestPowerOfTwoAbove(windowLength)); Window w = new BlackmanWindow(windowLength); FunctionGraph timeGraph = new FunctionGraph(0, 1. / samplingRate, w.window); timeGraph.showInJFrame(w.toString() + " in time domain", true, false); double[] fftSignal = new double[fftSize]; // fftSignal should integrate to one, so normalise amplitudes: double sum = MathUtils.sum(w.window); for (int i = 0; i < w.window.length; i++) { fftSignal[i] = w.window[i] / sum; } LogSpectrum freqGraph = new LogSpectrum(fftSignal, samplingRate); freqGraph.showInJFrame(w.toString() + " log frequency response", true, false); }
public DoubleDataSource process(DoubleDataSource inputAudio) { amount = MathUtils.CheckLimits(amount, MIN_AMOUNT, MAX_AMOUNT); double[] vscales = {amount}; int frameLength = SignalProcUtils.getDFTSize(fs); int predictionOrder = SignalProcUtils.getLPOrder(fs); VocalTractScalingProcessor p = new VocalTractScalingProcessor(predictionOrder, fs, frameLength, vscales); FrameOverlapAddSource foas = new FrameOverlapAddSource(inputAudio, Window.HANNING, true, frameLength, fs, p); return new BufferedDoubleDataSource(foas); }
public static void plotZoomed(double[] x, String strTitle, double minVal) { if (x != null) plotZoomed(x, strTitle, minVal, MathUtils.getMax(x)); }
public static void plotZoomed(float[] x, String strTitle, double minVal) { plotZoomed(x, strTitle, minVal, MathUtils.getMax(x)); }
// st: Sinusoidal tracks // absMaxDesired: Desired absolute maximum of the output public double[] synthesize(SinusoidalTracks st, boolean isSilentSynthesis) { int n; // discrete time index int i, j; int nStart, nEnd, pStart, pEnd; float t; // continuous time float t2; // continuous time squared float t3; // continuous time cubed float tFinal = st.getOriginalDuration(); int nFinal = (int) (Math.floor(tFinal * st.fs + 0.5)); double[] y = new double[nFinal + 1]; Arrays.fill(y, 0.0); float currentAmp; float currentTheta; double alpha, beta; int M; float T; // Number of samples between consecutive frames (equals to pitch period in pitch // synchronous analysis/synthesis) float T2; // T squared float T3; // T cubed double oneOverTwoPi = 1.0 / MathUtils.TWOPI; double term1, term2; float currentTime; // For debugging purposes for (i = 0; i < st.totalTracks; i++) { for (j = 0; j < st.tracks[i].totalSins - 1; j++) { if (st.tracks[i].states[j] != SinusoidalTrack.TURNED_OFF) { pStart = (int) Math.floor(st.tracks[i].times[j] * st.fs + 0.5); pEnd = (int) Math.floor(st.tracks[i].times[j + 1] * st.fs + 0.5); nStart = Math.max(0, pStart); nEnd = Math.max(0, pEnd); nStart = Math.min(y.length - 1, nStart); nEnd = Math.min(y.length - 1, nEnd); // currentTime = 0.5f*(nEnd+nStart)/st.fs; // System.out.println("currentTime=" + String.valueOf(currentTime)); for (n = nStart; n < nEnd; n++) { if (false) // Direct synthesis { currentAmp = st.tracks[i].amps[j]; currentTheta = (n - nStart) * st.tracks[i].freqs[j] + st.tracks[i].phases[j]; y[n] += currentAmp * Math.cos(currentTheta); } else // Synthesis with interpolation { // Amplitude interpolation currentAmp = st.tracks[i].amps[j] + (st.tracks[i].amps[j + 1] - st.tracks[i].amps[j]) * ((float) n - pStart) / (pEnd - pStart + 1); T = (pEnd - pStart); if (n == nStart && st.tracks[i].states[j] == SinusoidalTrack.TURNED_ON) // Turning on a track { // Quatieri currentTheta = st.tracks[i].phases[j + 1] - T * st.tracks[i].freqs[j + 1]; currentAmp = 0.0f; } else if (n == nStart && st.tracks[i].states[j] == SinusoidalTrack.TURNED_OFF && j > 0) // Turning off a track { // Quatieri currentTheta = st.tracks[i].phases[j - 1] + T * st.tracks[i].freqs[j - 1]; currentAmp = 0.0f; } else // Cubic phase interpolation { // Quatieri M = (int) (Math.floor( oneOverTwoPi * ((st.tracks[i].phases[j] + T * st.tracks[i].freqs[j] - st.tracks[i].phases[j + 1]) + (st.tracks[i].freqs[j + 1] - st.tracks[i].freqs[j]) * 0.5 * T) + 0.5)); term1 = st.tracks[i].phases[j + 1] - st.tracks[i].phases[j] - T * st.tracks[i].freqs[j] + M * MathUtils.TWOPI; term2 = st.tracks[i].freqs[j + 1] - st.tracks[i].freqs[j]; T2 = T * T; T3 = T * T2; alpha = 3.0 * term1 / T2 - term2 / T; beta = -2 * term1 / T3 + term2 / T2; t = ((float) n - nStart); t2 = t * t; t3 = t * t2; // Quatieri currentTheta = (float) (st.tracks[i].phases[j] + st.tracks[i].freqs[j] * t + alpha * t2 + beta * t3); } // Synthesis y[n] += currentAmp * Math.cos(currentTheta); } // System.out.println(String.valueOf(currentTheta)); } } } if (!isSilentSynthesis) System.out.println( "Synthesized track " + String.valueOf(i + 1) + " of " + String.valueOf(st.totalTracks)); } y = MathUtils.multiply(y, st.absMaxOriginal / MathUtils.getAbsMax(y)); return y; }
public static void main(String[] args) throws UnsupportedAudioFileException, IOException { // File input AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[0])); int samplingRate = (int) inputAudio.getFormat().getSampleRate(); AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio); double[] x = signal.getAllData(); double maxOrig = MathUtils.getAbsMax(x); SinusoidalAnalyzer sa = null; SinusoidalTracks st = null; PitchSynchronousSinusoidalAnalyzer pa = null; // // Analysis float deltaInHz = SinusoidalAnalysisParams.DEFAULT_DELTA_IN_HZ; float numPeriods = PitchSynchronousSinusoidalAnalyzer.DEFAULT_ANALYSIS_PERIODS; boolean isSilentSynthesis = false; int windowType = Window.HANNING; boolean bRefinePeakEstimatesParabola = false; boolean bRefinePeakEstimatesBias = false; boolean bSpectralReassignment = false; boolean bAdjustNeighFreqDependent = false; // int spectralEnvelopeType = SinusoidalAnalysisParams.LP_SPEC; int spectralEnvelopeType = SinusoidalAnalysisParams.SEEVOC_SPEC; float[] initialPeakLocationsInHz = null; initialPeakLocationsInHz = new float[1]; for (int i = 0; i < 1; i++) initialPeakLocationsInHz[i] = (i + 1) * 350.0f; boolean isFixedRateAnalysis = false; boolean isRealSpeech = true; double startFreqInHz = 0.0; double endFreqInHz = 0.5 * samplingRate; SinusoidalAnalysisParams params = new SinusoidalAnalysisParams( samplingRate, startFreqInHz, endFreqInHz, windowType, bRefinePeakEstimatesParabola, bRefinePeakEstimatesBias, bSpectralReassignment, bAdjustNeighFreqDependent); if (isFixedRateAnalysis) { // Fixed window size and skip rate analysis double[] f0s = null; float ws_f0 = -1.0f; float ss_f0 = -1.0f; sa = new SinusoidalAnalyzer(params); if (spectralEnvelopeType == SinusoidalAnalysisParams.SEEVOC_SPEC) // Pitch info needed { String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc"; PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile); f0s = f0.contour; ws_f0 = (float) f0.header.windowSizeInSeconds; ss_f0 = (float) f0.header.skipSizeInSeconds; } st = sa.analyzeFixedRate( x, 0.020f, 0.010f, deltaInHz, spectralEnvelopeType, f0s, ws_f0, ss_f0); // } else { // Pitch synchronous analysis String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc"; PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile); int pitchMarkOffset = 0; PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks( f0.contour, samplingRate, x.length, f0.header.windowSizeInSeconds, f0.header.skipSizeInSeconds, true, pitchMarkOffset); pa = new PitchSynchronousSinusoidalAnalyzer(params); st = pa.analyzePitchSynchronous( x, pm, numPeriods, -1.0f, deltaInHz, spectralEnvelopeType, initialPeakLocationsInHz); isSilentSynthesis = false; } // // Resynthesis PeakMatchedSinusoidalSynthesizer ss = new PeakMatchedSinusoidalSynthesizer(samplingRate); x = ss.synthesize(st, isSilentSynthesis); // // File output DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(x), inputAudio.getFormat()); String outFileName = args[0].substring(0, args[0].length() - 4) + "_sinResynthFullbandPitchSynch.wav"; AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName)); // }
// Pseudo harmonics based noise generation for pseudo periods public static double[] synthesize( HntmSpeechSignal hnmSignal, HntmAnalyzerParams analysisParams, HntmSynthesizerParams synthesisParams, String referenceFile) { double[] noisePart = null; int trackNoToExamine = 1; int i, k, n; double t; // Time in seconds double tsik = 0.0; // Synthesis time in seconds double tsikPlusOne = 0.0; // Synthesis time in seconds double trackStartInSeconds, trackEndInSeconds; // double lastPeriodInSeconds = 0.0; int trackStartIndex, trackEndIndex; double akt; int numHarmonicsCurrentFrame, numHarmonicsPrevFrame, numHarmonicsNextFrame; int harmonicIndexShiftPrev, harmonicIndexShiftCurrent, harmonicIndexShiftNext; int maxNumHarmonics = 0; for (i = 0; i < hnmSignal.frames.length; i++) { if (hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f && hnmSignal.frames[i].n != null) { numHarmonicsCurrentFrame = (int) Math.floor(hnmSignal.samplingRateInHz / analysisParams.noiseF0InHz + 0.5); numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame); if (numHarmonicsCurrentFrame > maxNumHarmonics) maxNumHarmonics = numHarmonicsCurrentFrame; } } double aksi; double aksiPlusOne; float[] phasekis = null; float phasekiPlusOne; double ht; float phasekt = 0.0f; float phasekiEstimate = 0.0f; float phasekiPlusOneEstimate = 0.0f; int Mk; boolean isPrevNoised, isNoised, isNextNoised; boolean isTrackNoised, isNextTrackNoised, isPrevTrackNoised; int outputLen = SignalProcUtils.time2sample( hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz); noisePart = new double [outputLen]; // In fact, this should be prosody scaled length when you implement prosody // modifications Arrays.fill(noisePart, 0.0); // Write separate tracks to output double[][] noiseTracks = null; if (maxNumHarmonics > 0) { noiseTracks = new double[maxNumHarmonics][]; for (k = 0; k < maxNumHarmonics; k++) { noiseTracks[k] = new double[outputLen]; Arrays.fill(noiseTracks[k], 0.0); } phasekis = new float[maxNumHarmonics]; for (k = 0; k < maxNumHarmonics; k++) phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5)); } // int transitionLen = SignalProcUtils.time2sample( synthesisParams.unvoicedVoicedTrackTransitionInSeconds, hnmSignal.samplingRateInHz); Window transitionWin = Window.get(Window.HAMMING, transitionLen * 2); transitionWin.normalizePeakValue(1.0f); double[] halfTransitionWinLeft = transitionWin.getCoeffsLeftHalf(); float halfFs = hnmSignal.samplingRateInHz; for (i = 0; i < hnmSignal.frames.length; i++) { isPrevNoised = false; isNoised = false; isNextNoised = false; if (i > 0 && hnmSignal.frames[i - 1].n != null && hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < halfFs && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i - 1].n).ceps != null) isPrevNoised = true; if (i > 0 && hnmSignal.frames[i].n != null && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < halfFs && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps != null) isNoised = true; if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < halfFs && hnmSignal.frames[i + 1].n != null && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps != null) isNextNoised = true; numHarmonicsPrevFrame = 0; numHarmonicsCurrentFrame = 0; numHarmonicsNextFrame = 0; harmonicIndexShiftPrev = 0; harmonicIndexShiftCurrent = 0; harmonicIndexShiftNext = 0; if (isPrevNoised) { numHarmonicsPrevFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsPrevFrame = Math.max(0, numHarmonicsPrevFrame); harmonicIndexShiftPrev = (int) Math.floor( hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftPrev = Math.max(1, harmonicIndexShiftPrev); } if (isNoised) { numHarmonicsCurrentFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame); harmonicIndexShiftCurrent = (int) Math.floor( hnmSignal.frames[i].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent); } else if (!isNoised && isNextNoised) { numHarmonicsCurrentFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame); harmonicIndexShiftCurrent = (int) Math.floor( hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent); } if (isNextNoised) { numHarmonicsNextFrame = (int) Math.floor( (hnmSignal.samplingRateInHz - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz) / analysisParams.noiseF0InHz + 0.5); numHarmonicsNextFrame = Math.max(0, numHarmonicsNextFrame); harmonicIndexShiftNext = (int) Math.floor( hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz + 0.5); harmonicIndexShiftNext = Math.max(1, harmonicIndexShiftNext); } for (k = 0; k < numHarmonicsCurrentFrame; k++) { aksi = 0.0; aksiPlusOne = 0.0; phasekiPlusOne = 0.0f; isPrevTrackNoised = false; isTrackNoised = false; isNextTrackNoised = false; if (i > 0 && hnmSignal.frames[i - 1].n != null && numHarmonicsPrevFrame > k) isPrevTrackNoised = true; if (hnmSignal.frames[i].n != null && numHarmonicsCurrentFrame > k) isTrackNoised = true; if (i < hnmSignal.frames.length - 1 && hnmSignal.frames[i + 1].n != null && numHarmonicsNextFrame > k) isNextTrackNoised = true; tsik = hnmSignal.frames[i].tAnalysisInSeconds; if (i == 0) trackStartInSeconds = 0.0; else trackStartInSeconds = tsik; if (i == hnmSignal.frames.length - 1) tsikPlusOne = hnmSignal.originalDurationInSeconds; else tsikPlusOne = hnmSignal.frames[i + 1].tAnalysisInSeconds; trackEndInSeconds = tsikPlusOne; trackStartIndex = SignalProcUtils.time2sample(trackStartInSeconds, hnmSignal.samplingRateInHz); trackEndIndex = SignalProcUtils.time2sample(trackEndInSeconds, hnmSignal.samplingRateInHz); if (isTrackNoised && trackEndIndex - trackStartIndex + 1 > 0) { // Amplitudes if (isTrackNoised) { if (!analysisParams.useNoiseAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) aksi = RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps, (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) aksi = RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps, (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); } else { if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps.length) aksi = ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n) .ceps[k]; // Use amplitudes directly without cepstrum method else aksi = 0.0; } } else aksi = 0.0; if (isNextTrackNoised) { if (!analysisParams.useNoiseAmplitudesDirectly) { if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING) aksiPlusOne = RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps, (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); else if (analysisParams.regularizedCepstrumWarpingMethod == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING) aksiPlusOne = RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue( ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps, (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz, hnmSignal.samplingRateInHz); } else { if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps.length) aksiPlusOne = ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n) .ceps[k]; // Use amplitudes directly without cepstrum method else aksiPlusOne = 0.0; } } else aksiPlusOne = 0.0; // // Phases phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5)); phasekiPlusOne = (float) (phasekis[k] + (k + harmonicIndexShiftCurrent) * MathUtils.TWOPI * analysisParams.noiseF0InHz * (tsikPlusOne - tsik)); // Equation (3.55) // if (!isPrevTrackNoised) trackStartIndex = Math.max(0, trackStartIndex - transitionLen); for (n = trackStartIndex; n <= Math.min(trackEndIndex, outputLen - 1); n++) { t = SignalProcUtils.sample2time(n, hnmSignal.samplingRateInHz); // if (t>=tsik && t<tsikPlusOne) { // Amplitude estimate akt = MathUtils.interpolatedSample(tsik, t, tsikPlusOne, aksi, aksiPlusOne); // // Phase estimate phasekt = (float) (phasekiPlusOne * (t - tsik) / (tsikPlusOne - tsik)); // if (!isPrevTrackNoised && n - trackStartIndex < transitionLen) noiseTracks[k][n] = halfTransitionWinLeft[n - trackStartIndex] * akt * Math.cos(phasekt); else noiseTracks[k][n] = akt * Math.cos(phasekt); } } phasekis[k] = phasekiPlusOne; } } } for (k = 0; k < noiseTracks.length; k++) { for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n]; } // Write separate tracks to output if (noiseTracks != null) { for (k = 0; k < noiseTracks.length; k++) { for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n]; } if (referenceFile != null && FileUtils.exists(referenceFile) && synthesisParams.writeSeparateHarmonicTracksToOutputs) { // Write separate tracks to output AudioInputStream inputAudio = null; try { inputAudio = AudioSystem.getAudioInputStream(new File(referenceFile)); } catch (UnsupportedAudioFileException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (inputAudio != null) { // k=1; for (k = 0; k < noiseTracks.length; k++) { noiseTracks[k] = MathUtils.divide(noiseTracks[k], 32767.0); DDSAudioInputStream outputAudio = new DDSAudioInputStream( new BufferedDoubleDataSource(noiseTracks[k]), inputAudio.getFormat()); String outFileName = StringUtils.getFolderName(referenceFile) + "noiseTrack" + String.valueOf(k + 1) + ".wav"; try { AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } // } return noisePart; }
@Override public boolean compute() throws IOException, MaryConfigurationException { logger.info("Duration tree trainer started."); FeatureFileReader featureFile = FeatureFileReader.getFeatureFileReader(getProp(FEATUREFILE)); UnitFileReader unitFile = new UnitFileReader(getProp(UNITFILE)); FeatureVector[] allFeatureVectors = featureFile.getFeatureVectors(); int maxData = Integer.parseInt(getProp(MAXDATA)); if (maxData == 0) maxData = allFeatureVectors.length; FeatureVector[] featureVectors = new FeatureVector[Math.min(maxData, allFeatureVectors.length)]; System.arraycopy(allFeatureVectors, 0, featureVectors, 0, featureVectors.length); logger.debug( "Total of " + allFeatureVectors.length + " feature vectors -- will use " + featureVectors.length); AgglomerativeClusterer clusterer = new AgglomerativeClusterer( featureVectors, featureFile.getFeatureDefinition(), null, new DurationDistanceMeasure(unitFile), Float.parseFloat(getProp(PROPORTIONTESTDATA))); DirectedGraphWriter writer = new DirectedGraphWriter(); DirectedGraph graph; int iteration = 0; do { graph = clusterer.cluster(); iteration++; if (graph != null) { writer.saveGraph(graph, getProp(DURTREE) + ".level" + iteration); } } while (clusterer.canClusterMore()); if (graph == null) { return false; } // Now replace each leaf with a FloatLeafNode containing mean and stddev for (LeafNode leaf : graph.getLeafNodes()) { FeatureVectorLeafNode fvLeaf = (FeatureVectorLeafNode) leaf; FeatureVector[] fvs = fvLeaf.getFeatureVectors(); double[] dur = new double[fvs.length]; for (int i = 0; i < fvs.length; i++) { dur[i] = unitFile.getUnit(fvs[i].getUnitIndex()).duration / (float) unitFile.getSampleRate(); } double mean = MathUtils.mean(dur); double stddev = MathUtils.standardDeviation(dur, mean); FloatLeafNode floatLeaf = new FloatLeafNode(new float[] {(float) stddev, (float) mean}); Node mother = fvLeaf.getMother(); assert mother != null; if (mother.isDecisionNode()) { ((DecisionNode) mother).replaceDaughter(floatLeaf, fvLeaf.getNodeIndex()); } else { assert mother.isDirectedGraphNode(); assert ((DirectedGraphNode) mother).getLeafNode() == fvLeaf; ((DirectedGraphNode) mother).setLeafNode(floatLeaf); } } writer.saveGraph(graph, getProp(DURTREE)); return true; }