Example #1
0
 /**
  * For the voice with the given name, return the list of vocalizations supported by this voice,
  * one vocalization per line. These values can be used in the "name" attribute of the vocalization
  * tag.
  *
  * @param voiceName
  * @return the list of vocalizations, or the empty string if the voice does not support
  *     vocalizations.
  */
 public static String getVocalizations(String voiceName) {
   Voice v = Voice.getVoice(voiceName);
   if (v == null || !v.hasVocalizationSupport()) {
     return "";
   }
   VocalizationSynthesizer vs = v.getVocalizationSynthesizer();
   assert vs != null;
   String[] vocalizations = vs.listAvailableVocalizations();
   assert vocalizations != null;
   return StringUtils.toString(vocalizations);
 }
Example #2
0
 /**
  * Set the content data of this MaryData object from the given String. For XML data ({@link
  * MaryDataType#isXMLType()}), parse the String representation of the data into a DOM tree.
  *
  * @param dataString string representation of the input data.
  * @throws ParserConfigurationException ParserConfigurationException
  * @throws IOException IOException
  * @throws SAXException SAXException
  * @throws IllegalArgumentException if this method is called for MaryDataTypes that are neither
  *     text nor XML.
  */
 public void setData(String dataString)
     throws ParserConfigurationException, SAXException, IOException {
   // First, some data cleanup:
   dataString = StringUtils.purgeNonBreakingSpaces(dataString);
   // Now, deal with it.
   if (type.isXMLType()) {
     logger.debug(
         "Parsing XML input (" + (doValidate ? "" : "non-") + "validating): " + dataString);
     xmlDocument = DomUtils.parseDocument(dataString, doValidate);
   } else if (type.isTextType()) {
     logger.debug("Setting text input: " + dataString);
     plainText = dataString;
   } else {
     throw new IllegalArgumentException("Cannot set data of type " + type + " from a string");
   }
 }
Example #3
0
  public static Vector<String> getDefaultVoiceExampleTexts() {
    String defaultVoiceName = getDefaultVoiceName();
    Vector<String> defaultVoiceExampleTexts = null;
    defaultVoiceExampleTexts =
        StringUtils.processVoiceExampleText(getVoiceExampleText(defaultVoiceName));
    if (defaultVoiceExampleTexts == null) // Try for general domain
    {
      String str = getExampleText("TEXT", Voice.getVoice(defaultVoiceName).getLocale());
      if (str != null && str.length() > 0) {
        defaultVoiceExampleTexts = new Vector<String>();
        defaultVoiceExampleTexts.add(str);
      }
    }

    return defaultVoiceExampleTexts;
  }
  // Pseudo harmonics based noise generation for pseudo periods
  public static double[] synthesize(
      HntmSpeechSignal hnmSignal,
      HntmAnalyzerParams analysisParams,
      HntmSynthesizerParams synthesisParams,
      String referenceFile) {
    double[] noisePart = null;
    int trackNoToExamine = 1;

    int i, k, n;
    double t; // Time in seconds

    double tsik = 0.0; // Synthesis time in seconds
    double tsikPlusOne = 0.0; // Synthesis time in seconds

    double trackStartInSeconds, trackEndInSeconds;
    // double lastPeriodInSeconds = 0.0;
    int trackStartIndex, trackEndIndex;
    double akt;
    int numHarmonicsCurrentFrame, numHarmonicsPrevFrame, numHarmonicsNextFrame;
    int harmonicIndexShiftPrev, harmonicIndexShiftCurrent, harmonicIndexShiftNext;
    int maxNumHarmonics = 0;
    for (i = 0; i < hnmSignal.frames.length; i++) {
      if (hnmSignal.frames[i].maximumFrequencyOfVoicingInHz > 0.0f
          && hnmSignal.frames[i].n != null) {
        numHarmonicsCurrentFrame =
            (int) Math.floor(hnmSignal.samplingRateInHz / analysisParams.noiseF0InHz + 0.5);
        numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame);
        if (numHarmonicsCurrentFrame > maxNumHarmonics) maxNumHarmonics = numHarmonicsCurrentFrame;
      }
    }

    double aksi;
    double aksiPlusOne;

    float[] phasekis = null;
    float phasekiPlusOne;

    double ht;
    float phasekt = 0.0f;

    float phasekiEstimate = 0.0f;
    float phasekiPlusOneEstimate = 0.0f;
    int Mk;
    boolean isPrevNoised, isNoised, isNextNoised;
    boolean isTrackNoised, isNextTrackNoised, isPrevTrackNoised;
    int outputLen =
        SignalProcUtils.time2sample(
            hnmSignal.originalDurationInSeconds, hnmSignal.samplingRateInHz);

    noisePart =
        new double
            [outputLen]; // In fact, this should be prosody scaled length when you implement prosody
                         // modifications
    Arrays.fill(noisePart, 0.0);

    // Write separate tracks to output
    double[][] noiseTracks = null;

    if (maxNumHarmonics > 0) {
      noiseTracks = new double[maxNumHarmonics][];
      for (k = 0; k < maxNumHarmonics; k++) {
        noiseTracks[k] = new double[outputLen];
        Arrays.fill(noiseTracks[k], 0.0);
      }

      phasekis = new float[maxNumHarmonics];
      for (k = 0; k < maxNumHarmonics; k++)
        phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5));
    }
    //

    int transitionLen =
        SignalProcUtils.time2sample(
            synthesisParams.unvoicedVoicedTrackTransitionInSeconds, hnmSignal.samplingRateInHz);
    Window transitionWin = Window.get(Window.HAMMING, transitionLen * 2);
    transitionWin.normalizePeakValue(1.0f);
    double[] halfTransitionWinLeft = transitionWin.getCoeffsLeftHalf();
    float halfFs = hnmSignal.samplingRateInHz;

    for (i = 0; i < hnmSignal.frames.length; i++) {
      isPrevNoised = false;
      isNoised = false;
      isNextNoised = false;

      if (i > 0
          && hnmSignal.frames[i - 1].n != null
          && hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz < halfFs
          && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i - 1].n).ceps != null)
        isPrevNoised = true;

      if (i > 0
          && hnmSignal.frames[i].n != null
          && hnmSignal.frames[i].maximumFrequencyOfVoicingInHz < halfFs
          && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps != null) isNoised = true;

      if (i < hnmSignal.frames.length - 1
          && hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz < halfFs
          && hnmSignal.frames[i + 1].n != null
          && ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps != null)
        isNextNoised = true;

      numHarmonicsPrevFrame = 0;
      numHarmonicsCurrentFrame = 0;
      numHarmonicsNextFrame = 0;
      harmonicIndexShiftPrev = 0;
      harmonicIndexShiftCurrent = 0;
      harmonicIndexShiftNext = 0;

      if (isPrevNoised) {
        numHarmonicsPrevFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz
                                - hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsPrevFrame = Math.max(0, numHarmonicsPrevFrame);
        harmonicIndexShiftPrev =
            (int)
                Math.floor(
                    hnmSignal.frames[i - 1].maximumFrequencyOfVoicingInHz
                            / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftPrev = Math.max(1, harmonicIndexShiftPrev);
      }

      if (isNoised) {
        numHarmonicsCurrentFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz - hnmSignal.frames[i].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame);
        harmonicIndexShiftCurrent =
            (int)
                Math.floor(
                    hnmSignal.frames[i].maximumFrequencyOfVoicingInHz / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent);
      } else if (!isNoised && isNextNoised) {
        numHarmonicsCurrentFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz
                                - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsCurrentFrame = Math.max(0, numHarmonicsCurrentFrame);
        harmonicIndexShiftCurrent =
            (int)
                Math.floor(
                    hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz
                            / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftCurrent = Math.max(1, harmonicIndexShiftCurrent);
      }

      if (isNextNoised) {
        numHarmonicsNextFrame =
            (int)
                Math.floor(
                    (hnmSignal.samplingRateInHz
                                - hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz)
                            / analysisParams.noiseF0InHz
                        + 0.5);
        numHarmonicsNextFrame = Math.max(0, numHarmonicsNextFrame);
        harmonicIndexShiftNext =
            (int)
                Math.floor(
                    hnmSignal.frames[i + 1].maximumFrequencyOfVoicingInHz
                            / analysisParams.noiseF0InHz
                        + 0.5);
        harmonicIndexShiftNext = Math.max(1, harmonicIndexShiftNext);
      }

      for (k = 0; k < numHarmonicsCurrentFrame; k++) {
        aksi = 0.0;
        aksiPlusOne = 0.0;

        phasekiPlusOne = 0.0f;

        isPrevTrackNoised = false;
        isTrackNoised = false;
        isNextTrackNoised = false;

        if (i > 0 && hnmSignal.frames[i - 1].n != null && numHarmonicsPrevFrame > k)
          isPrevTrackNoised = true;

        if (hnmSignal.frames[i].n != null && numHarmonicsCurrentFrame > k) isTrackNoised = true;

        if (i < hnmSignal.frames.length - 1
            && hnmSignal.frames[i + 1].n != null
            && numHarmonicsNextFrame > k) isNextTrackNoised = true;

        tsik = hnmSignal.frames[i].tAnalysisInSeconds;

        if (i == 0) trackStartInSeconds = 0.0;
        else trackStartInSeconds = tsik;

        if (i == hnmSignal.frames.length - 1) tsikPlusOne = hnmSignal.originalDurationInSeconds;
        else tsikPlusOne = hnmSignal.frames[i + 1].tAnalysisInSeconds;

        trackEndInSeconds = tsikPlusOne;

        trackStartIndex =
            SignalProcUtils.time2sample(trackStartInSeconds, hnmSignal.samplingRateInHz);
        trackEndIndex = SignalProcUtils.time2sample(trackEndInSeconds, hnmSignal.samplingRateInHz);

        if (isTrackNoised && trackEndIndex - trackStartIndex + 1 > 0) {
          // Amplitudes
          if (isTrackNoised) {
            if (!analysisParams.useNoiseAmplitudesDirectly) {
              if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING)
                aksi =
                    RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps,
                        (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
              else if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING)
                aksi =
                    RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps,
                        (k + harmonicIndexShiftCurrent) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
            } else {
              if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n).ceps.length)
                aksi =
                    ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i].n)
                        .ceps[k]; // Use amplitudes directly without cepstrum method
              else aksi = 0.0;
            }
          } else aksi = 0.0;

          if (isNextTrackNoised) {
            if (!analysisParams.useNoiseAmplitudesDirectly) {
              if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING)
                aksiPlusOne =
                    RegularizedPreWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps,
                        (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
              else if (analysisParams.regularizedCepstrumWarpingMethod
                  == RegularizedCepstrumEstimator.REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING)
                aksiPlusOne =
                    RegularizedPostWarpedCepstrumEstimator.cepstrum2linearSpectrumValue(
                        ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps,
                        (k + harmonicIndexShiftNext) * analysisParams.noiseF0InHz,
                        hnmSignal.samplingRateInHz);
            } else {
              if (k < ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n).ceps.length)
                aksiPlusOne =
                    ((FrameNoisePartPseudoHarmonic) hnmSignal.frames[i + 1].n)
                        .ceps[k]; // Use amplitudes directly without cepstrum method
              else aksiPlusOne = 0.0;
            }
          } else aksiPlusOne = 0.0;
          //

          // Phases
          phasekis[k] = (float) (MathUtils.TWOPI * (Math.random() - 0.5));
          phasekiPlusOne =
              (float)
                  (phasekis[k]
                      + (k + harmonicIndexShiftCurrent)
                          * MathUtils.TWOPI
                          * analysisParams.noiseF0InHz
                          * (tsikPlusOne - tsik)); // Equation (3.55)
          //

          if (!isPrevTrackNoised) trackStartIndex = Math.max(0, trackStartIndex - transitionLen);

          for (n = trackStartIndex; n <= Math.min(trackEndIndex, outputLen - 1); n++) {
            t = SignalProcUtils.sample2time(n, hnmSignal.samplingRateInHz);

            // if (t>=tsik && t<tsikPlusOne)
            {
              // Amplitude estimate
              akt = MathUtils.interpolatedSample(tsik, t, tsikPlusOne, aksi, aksiPlusOne);
              //

              // Phase estimate
              phasekt = (float) (phasekiPlusOne * (t - tsik) / (tsikPlusOne - tsik));
              //

              if (!isPrevTrackNoised && n - trackStartIndex < transitionLen)
                noiseTracks[k][n] =
                    halfTransitionWinLeft[n - trackStartIndex] * akt * Math.cos(phasekt);
              else noiseTracks[k][n] = akt * Math.cos(phasekt);
            }
          }

          phasekis[k] = phasekiPlusOne;
        }
      }
    }

    for (k = 0; k < noiseTracks.length; k++) {
      for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n];
    }

    // Write separate tracks to output
    if (noiseTracks != null) {
      for (k = 0; k < noiseTracks.length; k++) {
        for (n = 0; n < noisePart.length; n++) noisePart[n] += noiseTracks[k][n];
      }

      if (referenceFile != null
          && FileUtils.exists(referenceFile)
          && synthesisParams.writeSeparateHarmonicTracksToOutputs) {
        // Write separate tracks to output
        AudioInputStream inputAudio = null;
        try {
          inputAudio = AudioSystem.getAudioInputStream(new File(referenceFile));
        } catch (UnsupportedAudioFileException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

        if (inputAudio != null) {
          // k=1;
          for (k = 0; k < noiseTracks.length; k++) {
            noiseTracks[k] = MathUtils.divide(noiseTracks[k], 32767.0);

            DDSAudioInputStream outputAudio =
                new DDSAudioInputStream(
                    new BufferedDoubleDataSource(noiseTracks[k]), inputAudio.getFormat());
            String outFileName =
                StringUtils.getFolderName(referenceFile)
                    + "noiseTrack"
                    + String.valueOf(k + 1)
                    + ".wav";
            try {
              AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
            } catch (IOException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
            }
          }
        }
      }
      //
    }

    return noisePart;
  }