/** * Encode the given input signal. * * @param bits - Speex bits buffer. * @param in - the raw mono audio frame to encode. * @return 1 if successful. */ public int encode(final Bits bits, final float[] in) { int i; float[] mem, innov, syn_resp; float[] low_pi_gain, low_exc, low_innov; int dtx; /* Compute the two sub-bands by filtering with h0 and h1*/ Filters.qmf_decomp(in, h0, x0d, x1d, fullFrameSize, QMF_ORDER, h0_mem); /* Encode the narrowband part*/ lowenc.encode(bits, x0d); /* High-band buffering / sync with low band */ for (i = 0; i < windowSize - frameSize; i++) high[i] = high[frameSize + i]; for (i = 0; i < frameSize; i++) high[windowSize - frameSize + i] = x1d[i]; System.arraycopy(excBuf, frameSize, excBuf, 0, bufSize - frameSize); low_pi_gain = lowenc.getPiGain(); low_exc = lowenc.getExc(); low_innov = lowenc.getInnov(); int low_mode = lowenc.getMode(); if (low_mode == 0) dtx = 1; else dtx = 0; /* Start encoding the high-band */ for (i = 0; i < windowSize; i++) buf[i] = high[i] * window[i]; /* Compute auto-correlation */ Lpc.autocorr(buf, autocorr, lpcSize + 1, windowSize); autocorr[0] += 1; /* prevents NANs */ autocorr[0] *= lpc_floor; /* Noise floor in auto-correlation domain */ /* Lag windowing: equivalent to filtering in the power-spectrum domain */ for (i = 0; i < lpcSize + 1; i++) autocorr[i] *= lagWindow[i]; /* Levinson-Durbin */ Lpc.wld(lpc, autocorr, rc, lpcSize); // tmperr System.arraycopy(lpc, 0, lpc, 1, lpcSize); lpc[0] = 1; /* LPC to LSPs (x-domain) transform */ int roots = Lsp.lpc2lsp(lpc, lpcSize, lsp, 15, 0.2f); if (roots != lpcSize) { roots = Lsp.lpc2lsp(lpc, lpcSize, lsp, 11, 0.02f); if (roots != lpcSize) { /*If we can't find all LSP's, do some damage control and use a flat filter*/ for (i = 0; i < lpcSize; i++) { lsp[i] = (float) Math.cos(Math.PI * ((float) (i + 1)) / (lpcSize + 1)); } } } /* x-domain to angle domain*/ for (i = 0; i < lpcSize; i++) lsp[i] = (float) Math.acos(lsp[i]); float lsp_dist = 0; for (i = 0; i < lpcSize; i++) lsp_dist += (old_lsp[i] - lsp[i]) * (old_lsp[i] - lsp[i]); /*VBR stuff*/ if ((vbr_enabled != 0 || vad_enabled != 0) && dtx == 0) { float e_low = 0, e_high = 0; float ratio; if (abr_enabled != 0) { float qual_change = 0; if (abr_drift2 * abr_drift > 0) { /* Only adapt if long-term and short-term drift are the same sign */ qual_change = -.00001f * abr_drift / (1 + abr_count); if (qual_change > .1f) qual_change = .1f; if (qual_change < -.1f) qual_change = -.1f; } vbr_quality += qual_change; if (vbr_quality > 10) vbr_quality = 10; if (vbr_quality < 0) vbr_quality = 0; } for (i = 0; i < frameSize; i++) { e_low += x0d[i] * x0d[i]; e_high += high[i] * high[i]; } ratio = (float) Math.log((1 + e_high) / (1 + e_low)); relative_quality = lowenc.getRelativeQuality(); if (ratio < -4) ratio = -4; if (ratio > 2) ratio = 2; /*if (ratio>-2)*/ if (vbr_enabled != 0) { int modeid; modeid = nb_modes - 1; relative_quality += 1.0 * (ratio + 2); if (relative_quality < -1) { relative_quality = -1; } while (modeid != 0) { int v1; float thresh; v1 = (int) Math.floor(vbr_quality); if (v1 == 10) thresh = Vbr.hb_thresh[modeid][v1]; else thresh = (vbr_quality - v1) * Vbr.hb_thresh[modeid][v1 + 1] + (1 + v1 - vbr_quality) * Vbr.hb_thresh[modeid][v1]; if (relative_quality >= thresh) break; modeid--; } setMode(modeid); if (abr_enabled != 0) { int bitrate; bitrate = getBitRate(); abr_drift += (bitrate - abr_enabled); abr_drift2 = .95f * abr_drift2 + .05f * (bitrate - abr_enabled); abr_count += 1.0; } } else { /* VAD only */ int modeid; if (relative_quality < 2.0) modeid = 1; else modeid = submodeSelect; /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/ submodeID = modeid; } /*fprintf (stderr, "%f %f\n", ratio, low_qual);*/ } bits.pack(1, 1); if (dtx != 0) bits.pack(0, SB_SUBMODE_BITS); else bits.pack(submodeID, SB_SUBMODE_BITS); /* If null mode (no transmission), just set a couple things to zero*/ if (dtx != 0 || submodes[submodeID] == null) { for (i = 0; i < frameSize; i++) excBuf[excIdx + i] = swBuf[i] = VERY_SMALL; for (i = 0; i < lpcSize; i++) mem_sw[i] = 0; first = 1; /* Final signal synthesis from excitation */ Filters.iir_mem2(excBuf, excIdx, interp_qlpc, high, 0, subframeSize, lpcSize, mem_sp); /* Reconstruct the original */ filters.fir_mem_up(x0d, h0, y0, fullFrameSize, QMF_ORDER, g0_mem); filters.fir_mem_up(high, h1, y1, fullFrameSize, QMF_ORDER, g1_mem); for (i = 0; i < fullFrameSize; i++) in[i] = 2 * (y0[i] - y1[i]); if (dtx != 0) return 0; else return 1; } /* LSP quantization */ submodes[submodeID].lsqQuant.quant(lsp, qlsp, lpcSize, bits); if (first != 0) { for (i = 0; i < lpcSize; i++) old_lsp[i] = lsp[i]; for (i = 0; i < lpcSize; i++) old_qlsp[i] = qlsp[i]; } mem = new float[lpcSize]; syn_resp = new float[subframeSize]; innov = new float[subframeSize]; for (int sub = 0; sub < nbSubframes; sub++) { float tmp, filter_ratio; int exc, sp, sw, resp; int offset; float rl, rh, eh = 0, el = 0; int fold; offset = subframeSize * sub; sp = offset; exc = excIdx + offset; resp = offset; sw = offset; /* LSP interpolation (quantized and unquantized) */ tmp = (1.0f + sub) / nbSubframes; for (i = 0; i < lpcSize; i++) interp_lsp[i] = (1 - tmp) * old_lsp[i] + tmp * lsp[i]; for (i = 0; i < lpcSize; i++) interp_qlsp[i] = (1 - tmp) * old_qlsp[i] + tmp * qlsp[i]; Lsp.enforce_margin(interp_lsp, lpcSize, .05f); Lsp.enforce_margin(interp_qlsp, lpcSize, .05f); /* Compute interpolated LPCs (quantized and unquantized) */ for (i = 0; i < lpcSize; i++) interp_lsp[i] = (float) Math.cos(interp_lsp[i]); for (i = 0; i < lpcSize; i++) interp_qlsp[i] = (float) Math.cos(interp_qlsp[i]); m_lsp.lsp2lpc(interp_lsp, interp_lpc, lpcSize); m_lsp.lsp2lpc(interp_qlsp, interp_qlpc, lpcSize); Filters.bw_lpc(gamma1, interp_lpc, bw_lpc1, lpcSize); Filters.bw_lpc(gamma2, interp_lpc, bw_lpc2, lpcSize); /* Compute mid-band (4000 Hz for wideband) response of low-band and high-band filters */ rl = rh = 0; tmp = 1; pi_gain[sub] = 0; for (i = 0; i <= lpcSize; i++) { rh += tmp * interp_qlpc[i]; tmp = -tmp; pi_gain[sub] += interp_qlpc[i]; } rl = low_pi_gain[sub]; rl = 1 / (Math.abs(rl) + .01f); rh = 1 / (Math.abs(rh) + .01f); /* Compute ratio, will help predict the gain */ filter_ratio = Math.abs(.01f + rh) / (.01f + Math.abs(rl)); fold = filter_ratio < 5 ? 1 : 0; /*printf ("filter_ratio %f\n", filter_ratio);*/ fold = 0; /* Compute "real excitation" */ Filters.fir_mem2(high, sp, interp_qlpc, excBuf, exc, subframeSize, lpcSize, mem_sp2); /* Compute energy of low-band and high-band excitation */ for (i = 0; i < subframeSize; i++) eh += excBuf[exc + i] * excBuf[exc + i]; if (submodes[submodeID].innovation == null) { /* 1 for spectral folding excitation, 0 for stochastic */ float g; /*speex_bits_pack(bits, 1, 1);*/ for (i = 0; i < subframeSize; i++) el += low_innov[offset + i] * low_innov[offset + i]; /* Gain to use if we want to use the low-band excitation for high-band */ g = eh / (.01f + el); g = (float) Math.sqrt(g); g *= filter_ratio; /*print_vec(&g, 1, "gain factor");*/ /* Gain quantization */ { int quant = (int) Math.floor(.5 + 10 + 8.0 * Math.log((g + .0001))); /*speex_warning_int("tata", quant);*/ if (quant < 0) quant = 0; if (quant > 31) quant = 31; bits.pack(quant, 5); g = (float) (.1 * Math.exp(quant / 9.4)); } /*printf ("folding gain: %f\n", g);*/ g /= filter_ratio; } else { float gc, scale, scale_1; for (i = 0; i < subframeSize; i++) el += low_exc[offset + i] * low_exc[offset + i]; /*speex_bits_pack(bits, 0, 1);*/ gc = (float) (Math.sqrt(1 + eh) * filter_ratio / Math.sqrt((1 + el) * subframeSize)); { int qgc = (int) Math.floor(.5 + 3.7 * (Math.log(gc) + 2)); if (qgc < 0) qgc = 0; if (qgc > 15) qgc = 15; bits.pack(qgc, 4); gc = (float) Math.exp((1 / 3.7) * qgc - 2); } scale = gc * (float) Math.sqrt(1 + el) / filter_ratio; scale_1 = 1 / scale; for (i = 0; i < subframeSize; i++) excBuf[exc + i] = 0; excBuf[exc] = 1; Filters.syn_percep_zero( excBuf, exc, interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, subframeSize, lpcSize); /* Reset excitation */ for (i = 0; i < subframeSize; i++) excBuf[exc + i] = 0; /* Compute zero response (ringing) of A(z/g1) / ( A(z/g2) * Aq(z) ) */ for (i = 0; i < lpcSize; i++) mem[i] = mem_sp[i]; Filters.iir_mem2(excBuf, exc, interp_qlpc, excBuf, exc, subframeSize, lpcSize, mem); for (i = 0; i < lpcSize; i++) mem[i] = mem_sw[i]; Filters.filter_mem2( excBuf, exc, bw_lpc1, bw_lpc2, res, resp, subframeSize, lpcSize, mem, 0); /* Compute weighted signal */ for (i = 0; i < lpcSize; i++) mem[i] = mem_sw[i]; Filters.filter_mem2(high, sp, bw_lpc1, bw_lpc2, swBuf, sw, subframeSize, lpcSize, mem, 0); /* Compute target signal */ for (i = 0; i < subframeSize; i++) target[i] = swBuf[sw + i] - res[resp + i]; for (i = 0; i < subframeSize; i++) excBuf[exc + i] = 0; for (i = 0; i < subframeSize; i++) target[i] *= scale_1; /* Reset excitation */ for (i = 0; i < subframeSize; i++) innov[i] = 0; /*print_vec(target, st->subframeSize, "\ntarget");*/ submodes[submodeID].innovation.quant( target, interp_qlpc, bw_lpc1, bw_lpc2, lpcSize, subframeSize, innov, 0, syn_resp, bits, (complexity + 1) >> 1); /*print_vec(target, st->subframeSize, "after");*/ for (i = 0; i < subframeSize; i++) excBuf[exc + i] += innov[i] * scale; if (submodes[submodeID].double_codebook != 0) { float[] innov2 = new float[subframeSize]; for (i = 0; i < subframeSize; i++) innov2[i] = 0; for (i = 0; i < subframeSize; i++) target[i] *= 2.5; submodes[submodeID].innovation.quant( target, interp_qlpc, bw_lpc1, bw_lpc2, lpcSize, subframeSize, innov2, 0, syn_resp, bits, (complexity + 1) >> 1); for (i = 0; i < subframeSize; i++) innov2[i] *= scale * (1 / 2.5); for (i = 0; i < subframeSize; i++) excBuf[exc + i] += innov2[i]; } } /*Keep the previous memory*/ for (i = 0; i < lpcSize; i++) mem[i] = mem_sp[i]; /* Final signal synthesis from excitation */ Filters.iir_mem2(excBuf, exc, interp_qlpc, high, sp, subframeSize, lpcSize, mem_sp); /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */ Filters.filter_mem2(high, sp, bw_lpc1, bw_lpc2, swBuf, sw, subframeSize, lpcSize, mem_sw, 0); } // #ifndef RELEASE /* Reconstruct the original */ filters.fir_mem_up(x0d, h0, y0, fullFrameSize, QMF_ORDER, g0_mem); filters.fir_mem_up(high, h1, y1, fullFrameSize, QMF_ORDER, g1_mem); for (i = 0; i < fullFrameSize; i++) in[i] = 2 * (y0[i] - y1[i]); // #endif for (i = 0; i < lpcSize; i++) old_lsp[i] = lsp[i]; for (i = 0; i < lpcSize; i++) old_qlsp[i] = qlsp[i]; first = 0; return 1; }