/* * Group individual sinusoids into tracks by considering closeness in frequency * Current version is a simple implementation of checking the frequency difference between neighbouring * sinusoids and assigning them to same track if the absolute difference is less than a threshold * Possible ways to improve this process would be to employ: * - constraints on amplitude continuity * - constraints on phase continuity (i.e. the phase difference between two consecutive sinusoids * should not be larger or smaller than some percent of the period * * framesSins[i][] : Array of sinusoidal parameters (amps, freqs, phases) extracted from ith speech frame * framesSins[i][j]: Sinusoidal parameters of the jth peak sinusoid in the DFT spectrum of speech frame i * Returns a number of sinusoidal tracks * * This version uses a simple search mechanism to compare a current sinusoid frequecny with the previous and if the difference is smaller than * +-deltaInHz, assigns the new sinusoid to the previous sinusoid´s track * In the assignment, longer previous paths are favoured in a weighted manner, i.e. the longer a candidate track, * the more likely the current sinusoid gets assigned to that track * */ public SinusoidalTracks generateTracks( NonharmonicSinusoidalSpeechSignal sinSignal, float deltaInHz, int samplingRate) { int numFrames = sinSignal.framesSins.length; float deltaInRadians = SignalProcUtils.hz2radian(deltaInHz, samplingRate); SinusoidalTracks tr = null; int i; Sinusoid zeroAmpSin; if (numFrames > 0) { int j, k; float tmpDist, minDist; int trackInd; boolean[] bSinAssigneds = null; for (i = 0; i < numFrames; i++) { if (tr == null) // If no tracks yet, assign the current sinusoids to new tracks { tr = new SinusoidalTracks(sinSignal.framesSins[i].sinusoids.length, samplingRate); tr.setSysAmpsAndTimes(sinSignal.framesSins); for (j = 0; j < sinSignal.framesSins[i].sinusoids.length; j++) { // First add a zero amplitude sinusoid at previous time instant to allow smooth // synthesis (i.e. "turning on" the track) zeroAmpSin = new Sinusoid( 0.0f, sinSignal.framesSins[i].sinusoids[j].freq, 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.add( new SinusoidalTrack( sinSignal.framesSins[i].time - ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.TURNED_ON)); // tr.tracks[tr.currentIndex].add( sinSignal.framesSins[i].time, sinSignal.framesSins[i].sinusoids[j], sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.ACTIVE); } } else // If there are tracks, first check "continuations" by checking whether a given // sinusoid is in the +-deltaInRadians neighbourhood of the previous track. // Those tracks that do not continue are "turned off". // All sinusoids of the current frame that are not assigned to any of the "continuations" or // "turned off" are "birth"s of new tracks. { for (j = 0; j < tr.currentIndex + 1; j++) { if (tr.tracks[j] != null) tr.tracks[j].resetCandidate(); } bSinAssigneds = new boolean[sinSignal.framesSins[i].sinusoids.length]; // Continuations: for (k = 0; k < sinSignal.framesSins[i].sinusoids.length; k++) { minDist = Math.abs( sinSignal.framesSins[i].sinusoids[k].freq - tr.tracks[0].freqs[tr.tracks[0].currentIndex]); if (minDist < deltaInRadians) trackInd = 0; else trackInd = -1; for (j = 1; j < tr.currentIndex + 1; j++) { tmpDist = Math.abs( sinSignal.framesSins[i].sinusoids[k].freq - tr.tracks[j].freqs[tr.tracks[j].currentIndex]); if (tmpDist < deltaInRadians && (trackInd == -1 || tmpDist < minDist)) { minDist = tmpDist; trackInd = j; } } if (trackInd > -1) { if (tr.tracks[trackInd].newCandidateInd > -1) bSinAssigneds[tr.tracks[trackInd].newCandidateInd] = false; tr.tracks[trackInd].newCandidate = new Sinusoid(sinSignal.framesSins[i].sinusoids[k]); tr.tracks[trackInd].newCandidateInd = k; bSinAssigneds[k] = true; // The sinusoid might be assigned to an existing track provided that a // closer sinusoid is not found } else bSinAssigneds[k] = false; // This is the birth of a new track since it does not match any existing // tracks } // Here is the actual assignment of sinusoids to existing tracks for (j = 0; j < tr.currentIndex + 1; j++) { if (tr.tracks[j].newCandidate != null) { Sinusoid tmpSin = new Sinusoid(tr.tracks[j].newCandidate); if (tr.tracks[j].states[tr.tracks[j].currentIndex] != SinusoidalTrack.ACTIVE) { zeroAmpSin = new Sinusoid( 0.0f, tr.tracks[j].freqs[tr.tracks[j].totalSins - 1], 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.tracks[j].add( sinSignal.framesSins[i].time - ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.TURNED_ON); } tr.tracks[j].add( sinSignal.framesSins[i].time, tmpSin, sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.ACTIVE); } else // Turn off tracks that are not assigned any new sinusoid { if (tr.tracks[j].states[tr.tracks[j].currentIndex] != SinusoidalTrack.TURNED_OFF) { zeroAmpSin = new Sinusoid( 0.0f, tr.tracks[j].freqs[tr.tracks[j].totalSins - 1], 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.tracks[j].add( sinSignal.framesSins[i].time + ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.TURNED_OFF); } } } // Births: Create new tracks from sinusoids that are not assigned to existing tracks for (k = 0; k < bSinAssigneds.length; k++) { if (!bSinAssigneds[k]) { // First add a zero amplitude sinusoid to previous frame to allow smooth synthesis // (i.e. "turning on" the track) zeroAmpSin = new Sinusoid( 0.0f, sinSignal.framesSins[i].sinusoids[k].freq, 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.add( new SinusoidalTrack( sinSignal.framesSins[i].time - ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.TURNED_ON)); // tr.tracks[tr.currentIndex].add( sinSignal.framesSins[i].time, sinSignal.framesSins[i].sinusoids[k], sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.ACTIVE); } } System.out.println( "Track generation using frame " + String.valueOf(i + 1) + " of " + String.valueOf(numFrames)); } // Turn-off all active tracks after the last speech frame if (i == numFrames - 1) { for (j = 0; j < tr.currentIndex + 1; j++) { if (Math.abs( sinSignal.framesSins[i].time - tr.tracks[j].times[tr.tracks[j].totalSins - 1]) < ZERO_AMP_SHIFT_IN_SECONDS) { if (tr.tracks[j].states[tr.tracks[j].currentIndex] == SinusoidalTrack.ACTIVE) { zeroAmpSin = new Sinusoid( 0.0f, tr.tracks[j].freqs[tr.tracks[j].totalSins - 1], 0.0f, Sinusoid.NON_EXISTING_FRAME_INDEX); tr.tracks[j].add( sinSignal.framesSins[i].time + ZERO_AMP_SHIFT_IN_SECONDS, zeroAmpSin, sinSignal.framesSins[i].maxFreqOfVoicing, SinusoidalTrack.TURNED_OFF); } } } } // } } for (i = 0; i <= tr.currentIndex; i++) tr.tracks[i].correctTrack(); tr.setOriginalDurationManual(sinSignal.originalDurationInSeconds); SinusoidalTracks trOut = new SinusoidalTracks(tr, 0, tr.currentIndex); trOut = postProcess(trOut); return trOut; }