Ejemplo n.º 1
0
  // Plots the values in x
  // If bAutoClose is specified, the figure is closed after milliSecondsToClose milliseconds
  // milliSecondsToClose: has no effect if bAutoClose is false
  public static void plot(
      double[] xIn,
      int startInd,
      int endInd,
      String strTitle,
      boolean bAutoClose,
      int milliSecondsToClose) {
    if (xIn != null) {
      endInd = MathUtils.CheckLimits(endInd, 0, xIn.length - 1);
      startInd = MathUtils.CheckLimits(startInd, 0, endInd);

      double[] x = new double[endInd - startInd + 1];
      System.arraycopy(xIn, startInd, x, 0, x.length);

      FunctionGraph graph = new FunctionGraph(400, 200, 0, 1, x);
      JFrame frame = graph.showInJFrame(strTitle, 500, 300, true, false);

      if (bAutoClose) {
        try {
          Thread.sleep(milliSecondsToClose);
        } catch (InterruptedException e) {
        }
        frame.dispose();
      }
    }
  }
Ejemplo n.º 2
0
 /**
  * Here the actual processing of the frequency-domain frame (in cartesian coordinates) happens.
  * This implementation converts to polar coordinates calls processPolar(), and converts the result
  * back to cartesian coordinates.
  *
  * @param real real
  * @param imag imag
  */
 protected final void process(double[] real, double[] imag) {
   MathUtils.toPolarCoordinates(real, imag);
   // for readability:
   double[] r = real;
   double[] phi = imag;
   // Now do something meaningful with the fourier transform
   processPolar(r, phi);
   // Convert back:
   MathUtils.toCartesianCoordinates(real, imag);
 }
Ejemplo n.º 3
0
 public void testStretch2() {
   double[] signal = FFTTest.getSampleSignal(16000);
   int samplingRate = 8000;
   double rateFactor = 0.5;
   NaiveVocoder nv =
       new NaiveVocoder(new BufferedDoubleDataSource(signal), samplingRate, rateFactor);
   double[] result = nv.getAllData();
   double meanSignalEnergy = MathUtils.mean(MathUtils.multiply(signal, signal));
   double meanResultEnergy = MathUtils.mean(MathUtils.multiply(result, result));
   double percentDifference =
       Math.abs(meanSignalEnergy - meanResultEnergy) / meanSignalEnergy * 100;
   assertTrue(
       "Stretching changed signal energy by  " + percentDifference + "%", percentDifference < 6);
 }
Ejemplo n.º 4
0
 @Test
 public void testIdentity() {
   double[] signal = FFTTest.getSampleSignal(16000);
   int samplingRate = 8000;
   FrameOverlapAddSource ola =
       new FrameOverlapAddSource(new BufferedDoubleDataSource(signal), 2048, samplingRate, null);
   double[] result = ola.getAllData();
   double err = MathUtils.sumSquaredError(signal, result);
   assertTrue("Error: " + err, err < 1.E-19);
 }
Ejemplo n.º 5
0
 public static void main(String[] args) {
   int samplingRate = Integer.getInteger("samplingrate", 1).intValue();
   int windowLengthMs = Integer.getInteger("windowlength.ms", 0).intValue();
   int windowLength = Integer.getInteger("windowlength.samples", 512).intValue();
   // If both are given, use window length in milliseconds:
   if (windowLengthMs != 0) windowLength = windowLengthMs * samplingRate / 1000;
   int fftSize = Math.max(4096, MathUtils.closestPowerOfTwoAbove(windowLength));
   Window w = new BlackmanWindow(windowLength);
   FunctionGraph timeGraph = new FunctionGraph(0, 1. / samplingRate, w.window);
   timeGraph.showInJFrame(w.toString() + " in time domain", true, false);
   double[] fftSignal = new double[fftSize];
   // fftSignal should integrate to one, so normalise amplitudes:
   double sum = MathUtils.sum(w.window);
   for (int i = 0; i < w.window.length; i++) {
     fftSignal[i] = w.window[i] / sum;
   }
   LogSpectrum freqGraph = new LogSpectrum(fftSignal, samplingRate);
   freqGraph.showInJFrame(w.toString() + " log frequency response", true, false);
 }
  public DoubleDataSource process(DoubleDataSource inputAudio) {
    amount = MathUtils.CheckLimits(amount, MIN_AMOUNT, MAX_AMOUNT);

    double[] vscales = {amount};

    int frameLength = SignalProcUtils.getDFTSize(fs);
    int predictionOrder = SignalProcUtils.getLPOrder(fs);

    VocalTractScalingProcessor p =
        new VocalTractScalingProcessor(predictionOrder, fs, frameLength, vscales);
    FrameOverlapAddSource foas =
        new FrameOverlapAddSource(inputAudio, Window.HANNING, true, frameLength, fs, p);

    return new BufferedDoubleDataSource(foas);
  }
Ejemplo n.º 7
0
 public static void plotZoomed(double[] x, String strTitle, double minVal) {
   if (x != null) plotZoomed(x, strTitle, minVal, MathUtils.getMax(x));
 }
Ejemplo n.º 8
0
 public static void plotZoomed(float[] x, String strTitle, double minVal) {
   plotZoomed(x, strTitle, minVal, MathUtils.getMax(x));
 }
  // st: Sinusoidal tracks
  // absMaxDesired: Desired absolute maximum of the output
  public double[] synthesize(SinusoidalTracks st, boolean isSilentSynthesis) {
    int n; // discrete time index
    int i, j;
    int nStart, nEnd, pStart, pEnd;
    float t; // continuous time
    float t2; // continuous time squared
    float t3; // continuous time cubed

    float tFinal = st.getOriginalDuration();
    int nFinal = (int) (Math.floor(tFinal * st.fs + 0.5));
    double[] y = new double[nFinal + 1];
    Arrays.fill(y, 0.0);
    float currentAmp;
    float currentTheta;
    double alpha, beta;
    int M;
    float
        T; // Number of samples between consecutive frames (equals to pitch period in pitch
           // synchronous analysis/synthesis)
    float T2; // T squared
    float T3; // T cubed
    double oneOverTwoPi = 1.0 / MathUtils.TWOPI;
    double term1, term2;

    float currentTime; // For debugging purposes

    for (i = 0; i < st.totalTracks; i++) {
      for (j = 0; j < st.tracks[i].totalSins - 1; j++) {
        if (st.tracks[i].states[j] != SinusoidalTrack.TURNED_OFF) {
          pStart = (int) Math.floor(st.tracks[i].times[j] * st.fs + 0.5);
          pEnd = (int) Math.floor(st.tracks[i].times[j + 1] * st.fs + 0.5);

          nStart = Math.max(0, pStart);
          nEnd = Math.max(0, pEnd);
          nStart = Math.min(y.length - 1, nStart);
          nEnd = Math.min(y.length - 1, nEnd);

          // currentTime = 0.5f*(nEnd+nStart)/st.fs;
          // System.out.println("currentTime=" + String.valueOf(currentTime));

          for (n = nStart; n < nEnd; n++) {
            if (false) // Direct synthesis
            {
              currentAmp = st.tracks[i].amps[j];
              currentTheta = (n - nStart) * st.tracks[i].freqs[j] + st.tracks[i].phases[j];
              y[n] += currentAmp * Math.cos(currentTheta);
            } else // Synthesis with interpolation
            {
              // Amplitude interpolation
              currentAmp =
                  st.tracks[i].amps[j]
                      + (st.tracks[i].amps[j + 1] - st.tracks[i].amps[j])
                          * ((float) n - pStart)
                          / (pEnd - pStart + 1);

              T = (pEnd - pStart);

              if (n == nStart
                  && st.tracks[i].states[j] == SinusoidalTrack.TURNED_ON) // Turning on a track
              {
                // Quatieri
                currentTheta = st.tracks[i].phases[j + 1] - T * st.tracks[i].freqs[j + 1];
                currentAmp = 0.0f;
              } else if (n == nStart
                  && st.tracks[i].states[j] == SinusoidalTrack.TURNED_OFF
                  && j > 0) // Turning off a track
              {
                // Quatieri
                currentTheta = st.tracks[i].phases[j - 1] + T * st.tracks[i].freqs[j - 1];
                currentAmp = 0.0f;
              } else // Cubic phase interpolation
              {
                // Quatieri
                M =
                    (int)
                        (Math.floor(
                            oneOverTwoPi
                                    * ((st.tracks[i].phases[j]
                                            + T * st.tracks[i].freqs[j]
                                            - st.tracks[i].phases[j + 1])
                                        + (st.tracks[i].freqs[j + 1] - st.tracks[i].freqs[j])
                                            * 0.5
                                            * T)
                                + 0.5));
                term1 =
                    st.tracks[i].phases[j + 1]
                        - st.tracks[i].phases[j]
                        - T * st.tracks[i].freqs[j]
                        + M * MathUtils.TWOPI;
                term2 = st.tracks[i].freqs[j + 1] - st.tracks[i].freqs[j];

                T2 = T * T;
                T3 = T * T2;
                alpha = 3.0 * term1 / T2 - term2 / T;
                beta = -2 * term1 / T3 + term2 / T2;

                t = ((float) n - nStart);
                t2 = t * t;
                t3 = t * t2;

                // Quatieri
                currentTheta =
                    (float)
                        (st.tracks[i].phases[j]
                            + st.tracks[i].freqs[j] * t
                            + alpha * t2
                            + beta * t3);
              }

              // Synthesis
              y[n] += currentAmp * Math.cos(currentTheta);
            }

            // System.out.println(String.valueOf(currentTheta));
          }
        }
      }

      if (!isSilentSynthesis)
        System.out.println(
            "Synthesized track " + String.valueOf(i + 1) + " of " + String.valueOf(st.totalTracks));
    }

    y = MathUtils.multiply(y, st.absMaxOriginal / MathUtils.getAbsMax(y));

    return y;
  }
  public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
    // File input
    AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[0]));
    int samplingRate = (int) inputAudio.getFormat().getSampleRate();
    AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
    double[] x = signal.getAllData();
    double maxOrig = MathUtils.getAbsMax(x);

    SinusoidalAnalyzer sa = null;
    SinusoidalTracks st = null;
    PitchSynchronousSinusoidalAnalyzer pa = null;
    //

    // Analysis
    float deltaInHz = SinusoidalAnalysisParams.DEFAULT_DELTA_IN_HZ;
    float numPeriods = PitchSynchronousSinusoidalAnalyzer.DEFAULT_ANALYSIS_PERIODS;

    boolean isSilentSynthesis = false;

    int windowType = Window.HANNING;

    boolean bRefinePeakEstimatesParabola = false;
    boolean bRefinePeakEstimatesBias = false;
    boolean bSpectralReassignment = false;
    boolean bAdjustNeighFreqDependent = false;

    // int spectralEnvelopeType = SinusoidalAnalysisParams.LP_SPEC;
    int spectralEnvelopeType = SinusoidalAnalysisParams.SEEVOC_SPEC;
    float[] initialPeakLocationsInHz = null;
    initialPeakLocationsInHz = new float[1];
    for (int i = 0; i < 1; i++) initialPeakLocationsInHz[i] = (i + 1) * 350.0f;

    boolean isFixedRateAnalysis = false;
    boolean isRealSpeech = true;
    double startFreqInHz = 0.0;
    double endFreqInHz = 0.5 * samplingRate;

    SinusoidalAnalysisParams params =
        new SinusoidalAnalysisParams(
            samplingRate,
            startFreqInHz,
            endFreqInHz,
            windowType,
            bRefinePeakEstimatesParabola,
            bRefinePeakEstimatesBias,
            bSpectralReassignment,
            bAdjustNeighFreqDependent);

    if (isFixedRateAnalysis) {
      // Fixed window size and skip rate analysis
      double[] f0s = null;
      float ws_f0 = -1.0f;
      float ss_f0 = -1.0f;
      sa = new SinusoidalAnalyzer(params);

      if (spectralEnvelopeType == SinusoidalAnalysisParams.SEEVOC_SPEC) // Pitch info needed
      {
        String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc";
        PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile);
        f0s = f0.contour;
        ws_f0 = (float) f0.header.windowSizeInSeconds;
        ss_f0 = (float) f0.header.skipSizeInSeconds;
      }

      st =
          sa.analyzeFixedRate(
              x, 0.020f, 0.010f, deltaInHz, spectralEnvelopeType, f0s, ws_f0, ss_f0);
      //
    } else {
      // Pitch synchronous analysis
      String strPitchFile = args[0].substring(0, args[0].length() - 4) + ".ptc";
      PitchReaderWriter f0 = new PitchReaderWriter(strPitchFile);
      int pitchMarkOffset = 0;
      PitchMarks pm =
          SignalProcUtils.pitchContour2pitchMarks(
              f0.contour,
              samplingRate,
              x.length,
              f0.header.windowSizeInSeconds,
              f0.header.skipSizeInSeconds,
              true,
              pitchMarkOffset);
      pa = new PitchSynchronousSinusoidalAnalyzer(params);

      st =
          pa.analyzePitchSynchronous(
              x, pm, numPeriods, -1.0f, deltaInHz, spectralEnvelopeType, initialPeakLocationsInHz);
      isSilentSynthesis = false;
    }
    //

    // Resynthesis
    PeakMatchedSinusoidalSynthesizer ss = new PeakMatchedSinusoidalSynthesizer(samplingRate);
    x = ss.synthesize(st, isSilentSynthesis);
    //

    // File output
    DDSAudioInputStream outputAudio =
        new DDSAudioInputStream(new BufferedDoubleDataSource(x), inputAudio.getFormat());
    String outFileName =
        args[0].substring(0, args[0].length() - 4) + "_sinResynthFullbandPitchSynch.wav";
    AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
    //
  }
  // Pseudo harmonics based noise generation for pseudo periods
  public static double[] synthesize(
      HntmSpeechSignal hnmSignal,
      HntmAnalyzerParams analysisParams,
      HntmSynthesizerParams synthesisParams,
      String referenceFile) {
    double[] noisePart = null;
    int trackNoToExamine = 1;

    int i, k, n;
    double t; // Time in seconds

    double tsik = 0.0; // Synthesis time in seconds
    double tsikPlusOne = 0.0; // Synthesis time in seconds

    double trackStartInSeconds, trackEndInSeconds;
    // double lastPeriodInSeconds = 0.0;
    int trackStartIndex, trackEndIndex;
    double akt;
    int numHarmonicsCurrentFrame, numHarmonicsPrevFrame, numHarmonicsNextFrame;
    int harmonicIndexShiftPrev, harmonicIndexShiftCurrent, harmonicIndexShiftNext;
    int maxNumHarmonics = 0;
    for (i = 0; i < hnmSignal.frames.length; i++) {
      if (hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f
          && hnmSignal.frames[i].n != null) {
        numHarmonicsCurrentFrame =
            (int) Math.floor(hnmSignal.samplingRateInHz / analysisParams.noiseF0InHz + 0.5);
        numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame);
        if (numHarmonicsCurrentFrame > maxNumHarmonics) maxNumHarmonics = numHarmonicsCurrentFrame;
      }
    }

    double aksi;
    double aksiPlusOne;

    float[] phasekis = null;
    float phasekiPlusOne;

    double ht;
    float phasekt = 0.0f;

    float phasekiEstimate = 0.0f;
    float phasekiPlusOneEstimate = 0.0f;
    int Mk;
    boolean isPrevNoised, isNoised, isNextNoised;
    boolean isTrackNoised, isNextTrackNoised, isPrevTrackNoised;
    int outputLen =
        SignalProcUtils.time2sample(
            hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz);

    noisePart =
        new double
            [outputLen]; // In fact, this should be prosody scaled length when you implement prosody
                         // modifications
    Arrays.fill(noisePart, 0.0);

    // Write separate tracks to output
    double[][] noiseTracks = null;

    if (maxNumHarmonics > 0) {
      noiseTracks = new double[maxNumHarmonics][];
      for (k = 0; k < maxNumHarmonics; k++) {
        noiseTracks[k] = new double[outputLen];
        Arrays.fill(noiseTracks[k], 0.0);
      }

      phasekis = new float[maxNumHarmonics];
      for (k = 0; k < maxNumHarmonics; k++)
        phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5));
    }
    //

    int transitionLen =
        SignalProcUtils.time2sample(
            synthesisParams.unvoicedVoicedTrackTransitionInSeconds, hnmSignal.samplingRateInHz);
    Window transitionWin = Window.get(Window.HAMMING, transitionLen * 2);
    transitionWin.normalizePeakValue(1.0f);
    double[] halfTransitionWinLeft = transitionWin.getCoeffsLeftHalf();
    float halfFs = hnmSignal.samplingRateInHz;

    for (i = 0; i < hnmSignal.frames.length; i++) {
      isPrevNoised = false;
      isNoised = false;
      isNextNoised = false;

      if (i > 0
          && hnmSignal.frames[i - 1].n != null
          && hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < halfFs
          && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i - 1].n).ceps != null)
        isPrevNoised = true;

      if (i > 0
          && hnmSignal.frames[i].n != null
          && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < halfFs
          && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps != null) isNoised = true;

      if (i < hnmSignal.frames.length - 1
          && hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < halfFs
          && hnmSignal.frames[i + 1].n != null
          && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps != null)
        isNextNoised = true;

      numHarmonicsPrevFrame = 0;
      numHarmonicsCurrentFrame = 0;
      numHarmonicsNextFrame = 0;
      harmonicIndexShiftPrev = 0;
      harmonicIndexShiftCurrent = 0;
      harmonicIndexShiftNext = 0;

      if (isPrevNoised) {
        numHarmonicsPrevFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz
                                - hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsPrevFrame = Math.max(0, numHarmonicsPrevFrame);
        harmonicIndexShiftPrev =
            (int)
                Math.floor(
                    hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz
                            / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftPrev = Math.max(1, harmonicIndexShiftPrev);
      }

      if (isNoised) {
        numHarmonicsCurrentFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz - hnmSignal.frames[i].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame);
        harmonicIndexShiftCurrent =
            (int)
                Math.floor(
                    hnmSignal.frames[i].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent);
      } else if (!isNoised && isNextNoised) {
        numHarmonicsCurrentFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz
                                - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame);
        harmonicIndexShiftCurrent =
            (int)
                Math.floor(
                    hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz
                            / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent);
      }

      if (isNextNoised) {
        numHarmonicsNextFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz
                                - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsNextFrame = Math.max(0, numHarmonicsNextFrame);
        harmonicIndexShiftNext =
            (int)
                Math.floor(
                    hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz
                            / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftNext = Math.max(1, harmonicIndexShiftNext);
      }

      for (k = 0; k < numHarmonicsCurrentFrame; k++) {
        aksi = 0.0;
        aksiPlusOne = 0.0;

        phasekiPlusOne = 0.0f;

        isPrevTrackNoised = false;
        isTrackNoised = false;
        isNextTrackNoised = false;

        if (i > 0 && hnmSignal.frames[i - 1].n != null && numHarmonicsPrevFrame > k)
          isPrevTrackNoised = true;

        if (hnmSignal.frames[i].n != null && numHarmonicsCurrentFrame > k) isTrackNoised = true;

        if (i < hnmSignal.frames.length - 1
            && hnmSignal.frames[i + 1].n != null
            && numHarmonicsNextFrame > k) isNextTrackNoised = true;

        tsik = hnmSignal.frames[i].tAnalysisInSeconds;

        if (i == 0) trackStartInSeconds = 0.0;
        else trackStartInSeconds = tsik;

        if (i == hnmSignal.frames.length - 1) tsikPlusOne = hnmSignal.originalDurationInSeconds;
        else tsikPlusOne = hnmSignal.frames[i + 1].tAnalysisInSeconds;

        trackEndInSeconds = tsikPlusOne;

        trackStartIndex =
            SignalProcUtils.time2sample(trackStartInSeconds, hnmSignal.samplingRateInHz);
        trackEndIndex = SignalProcUtils.time2sample(trackEndInSeconds, hnmSignal.samplingRateInHz);

        if (isTrackNoised && trackEndIndex - trackStartIndex + 1 > 0) {
          // Amplitudes
          if (isTrackNoised) {
            if (!analysisParams.useNoiseAmplitudesDirectly) {
              if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING)
                aksi =
                    RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps,
                        (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
              else if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING)
                aksi =
                    RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps,
                        (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
            } else {
              if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps.length)
                aksi =
                    ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n)
                        .ceps[k]; // Use amplitudes directly without cepstrum method
              else aksi = 0.0;
            }
          } else aksi = 0.0;

          if (isNextTrackNoised) {
            if (!analysisParams.useNoiseAmplitudesDirectly) {
              if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING)
                aksiPlusOne =
                    RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps,
                        (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
              else if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING)
                aksiPlusOne =
                    RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps,
                        (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
            } else {
              if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps.length)
                aksiPlusOne =
                    ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n)
                        .ceps[k]; // Use amplitudes directly without cepstrum method
              else aksiPlusOne = 0.0;
            }
          } else aksiPlusOne = 0.0;
          //

          // Phases
          phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5));
          phasekiPlusOne =
              (float)
                  (phasekis[k]
                      + (k + harmonicIndexShiftCurrent)
                          * MathUtils.TWOPI
                          * analysisParams.noiseF0InHz
                          * (tsikPlusOne - tsik)); // Equation (3.55)
          //

          if (!isPrevTrackNoised) trackStartIndex = Math.max(0, trackStartIndex - transitionLen);

          for (n = trackStartIndex; n <= Math.min(trackEndIndex, outputLen - 1); n++) {
            t = SignalProcUtils.sample2time(n, hnmSignal.samplingRateInHz);

            // if (t>=tsik && t<tsikPlusOne)
            {
              // Amplitude estimate
              akt = MathUtils.interpolatedSample(tsik, t, tsikPlusOne, aksi, aksiPlusOne);
              //

              // Phase estimate
              phasekt = (float) (phasekiPlusOne * (t - tsik) / (tsikPlusOne - tsik));
              //

              if (!isPrevTrackNoised && n - trackStartIndex < transitionLen)
                noiseTracks[k][n] =
                    halfTransitionWinLeft[n - trackStartIndex] * akt * Math.cos(phasekt);
              else noiseTracks[k][n] = akt * Math.cos(phasekt);
            }
          }

          phasekis[k] = phasekiPlusOne;
        }
      }
    }

    for (k = 0; k < noiseTracks.length; k++) {
      for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n];
    }

    // Write separate tracks to output
    if (noiseTracks != null) {
      for (k = 0; k < noiseTracks.length; k++) {
        for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n];
      }

      if (referenceFile != null
          && FileUtils.exists(referenceFile)
          && synthesisParams.writeSeparateHarmonicTracksToOutputs) {
        // Write separate tracks to output
        AudioInputStream inputAudio = null;
        try {
          inputAudio = AudioSystem.getAudioInputStream(new File(referenceFile));
        } catch (UnsupportedAudioFileException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

        if (inputAudio != null) {
          // k=1;
          for (k = 0; k < noiseTracks.length; k++) {
            noiseTracks[k] = MathUtils.divide(noiseTracks[k], 32767.0);

            DDSAudioInputStream outputAudio =
                new DDSAudioInputStream(
                    new BufferedDoubleDataSource(noiseTracks[k]), inputAudio.getFormat());
            String outFileName =
                StringUtils.getFolderName(referenceFile)
                    + "noiseTrack"
                    + String.valueOf(k + 1)
                    + ".wav";
            try {
              AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
            } catch (IOException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
            }
          }
        }
      }
      //
    }

    return noisePart;
  }
Ejemplo n.º 12
0
  @Override
  public boolean compute() throws IOException, MaryConfigurationException {
    logger.info("Duration tree trainer started.");
    FeatureFileReader featureFile = FeatureFileReader.getFeatureFileReader(getProp(FEATUREFILE));
    UnitFileReader unitFile = new UnitFileReader(getProp(UNITFILE));

    FeatureVector[] allFeatureVectors = featureFile.getFeatureVectors();
    int maxData = Integer.parseInt(getProp(MAXDATA));
    if (maxData == 0) maxData = allFeatureVectors.length;
    FeatureVector[] featureVectors = new FeatureVector[Math.min(maxData, allFeatureVectors.length)];
    System.arraycopy(allFeatureVectors, 0, featureVectors, 0, featureVectors.length);
    logger.debug(
        "Total of "
            + allFeatureVectors.length
            + " feature vectors -- will use "
            + featureVectors.length);

    AgglomerativeClusterer clusterer =
        new AgglomerativeClusterer(
            featureVectors,
            featureFile.getFeatureDefinition(),
            null,
            new DurationDistanceMeasure(unitFile),
            Float.parseFloat(getProp(PROPORTIONTESTDATA)));
    DirectedGraphWriter writer = new DirectedGraphWriter();
    DirectedGraph graph;
    int iteration = 0;
    do {
      graph = clusterer.cluster();
      iteration++;
      if (graph != null) {
        writer.saveGraph(graph, getProp(DURTREE) + ".level" + iteration);
      }
    } while (clusterer.canClusterMore());

    if (graph == null) {
      return false;
    }

    // Now replace each leaf with a FloatLeafNode containing mean and stddev
    for (LeafNode leaf : graph.getLeafNodes()) {
      FeatureVectorLeafNode fvLeaf = (FeatureVectorLeafNode) leaf;
      FeatureVector[] fvs = fvLeaf.getFeatureVectors();
      double[] dur = new double[fvs.length];
      for (int i = 0; i < fvs.length; i++) {
        dur[i] =
            unitFile.getUnit(fvs[i].getUnitIndex()).duration / (float) unitFile.getSampleRate();
      }
      double mean = MathUtils.mean(dur);
      double stddev = MathUtils.standardDeviation(dur, mean);
      FloatLeafNode floatLeaf = new FloatLeafNode(new float[] {(float) stddev, (float) mean});
      Node mother = fvLeaf.getMother();
      assert mother != null;
      if (mother.isDecisionNode()) {
        ((DecisionNode) mother).replaceDaughter(floatLeaf, fvLeaf.getNodeIndex());
      } else {
        assert mother.isDirectedGraphNode();
        assert ((DirectedGraphNode) mother).getLeafNode() == fvLeaf;
        ((DirectedGraphNode) mother).setLeafNode(floatLeaf);
      }
    }
    writer.saveGraph(graph, getProp(DURTREE));
    return true;
  }