294 lines
11 KiB
C

/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* lattice.c
*
* Contains the normalized lattice filter routines (MA and AR) for iSAC codec
*
*/
#include "codec.h"
#include "settings.h"
/* filter the signal using normalized lattice filter */
/* MA filter */
void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
WebRtc_Word32 *stateGQ15,
WebRtc_Word16 *lat_inQ0,
WebRtc_Word16 *filt_coefQ15,
WebRtc_Word32 *gain_lo_hiQ17,
WebRtc_Word16 lo_hi,
WebRtc_Word16 *lat_outQ9)
{
WebRtc_Word16 sthQ15[MAX_AR_MODEL_ORDER];
WebRtc_Word16 cthQ15[MAX_AR_MODEL_ORDER];
int u, i, ii, k, n;
WebRtc_Word16 temp2,temp3;
WebRtc_Word16 ord_1 = orderCoef+1;
WebRtc_Word32 inv_cthQ16[MAX_AR_MODEL_ORDER];
WebRtc_Word32 gain32, fQtmp;
WebRtc_Word16 gain16;
WebRtc_Word16 gain_sh;
WebRtc_Word32 tmp32, tmp32b;
WebRtc_Word32 fQ15vec[HALF_SUBFRAMELEN];
WebRtc_Word32 gQ15[MAX_AR_MODEL_ORDER+1][HALF_SUBFRAMELEN];
WebRtc_Word16 sh;
WebRtc_Word16 t16a;
WebRtc_Word16 t16b;
#define LATTICE_MUL_32_32_RSFT16(a32a, a32b, b32) \
((WebRtc_Word32)(WEBRTC_SPL_MUL(a32a, b32) + (WEBRTC_SPL_MUL_16_32_RSFT16(a32b, b32))))
/* This macro is FORBIDDEN to use elsewhere than in two places in this file
since it might give unpredictable results, since a general WebRtc_Word32*WebRtc_Word32
multiplication results in a 64 bit value. The result is then shifted just
16 steps to the right, giving need for 48 bits, i.e. in the generel case,
it will NOT fit in a WebRtc_Word32. In the cases used in here, the WebRtc_Word32 will be
enough, since (FOR SOME REASON!!!) the involved multiplicands aren't big
enough to overflow a WebRtc_Word32 after shifting right 16 bits. I have compared
the result of a multiplication between t32 and tmp32, done in two ways:
1) Using (WebRtc_Word32) (((float)(tmp32))*((float)(tmp32b))/65536.0);
2) Using LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
By running 25 files, I haven't found any bigger diff than 64 - this was in the
case when method 1) gave 650235648 and 2) gave 650235712.
It might be good to investigate this further, in order to PROVE why it seems to
work without any problems. This might be done, by using the properties of
all reflection coefficients etc.
*/
for (u=0;u<SUBFRAMES;u++)
{
/* set the Direct Form coefficients */
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u)+lo_hi;
/* compute lattice filter coefficients */
for (ii=0; ii<orderCoef; ii++) {
sthQ15[ii] = filt_coefQ15[temp2+ii];
}
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
/* compute the gain */
gain32 = gain_lo_hiQ17[temp3];
gain_sh = WebRtcSpl_NormW32(gain32);
gain32 = WEBRTC_SPL_LSHIFT_W32(gain32, gain_sh); //Q(17+gain_sh)
for (k=0;k<orderCoef;k++)
{
gain32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gain32); //Q15*Q(17+gain_sh)>>15 = Q(17+gain_sh)
inv_cthQ16[k] = WebRtcSpl_DivW32W16((WebRtc_Word32)2147483647, cthQ15[k]); // 1/cth[k] in Q31/Q15 = Q16
}
gain16 = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(gain32, 16); //Q(1+gain_sh)
/* normalized lattice filter */
/*****************************/
/* initial conditions */
for (i=0;i<HALF_SUBFRAMELEN;i++)
{
fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 15); //Q15
gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 15); //Q15
}
fQtmp = fQ15vec[0];
/* get the state of f&g for the first input, for all orders */
for (i=1;i<ord_1;i++)
{
// Calculate f[i][0] = inv_cth[i-1]*(f[i-1][0] + sth[i-1]*stateG[i-1]);
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], stateGQ15[i-1]);//Q15*Q15>>15 = Q15
tmp32b= fQtmp + tmp32; //Q15+Q15=Q15
tmp32 = inv_cthQ16[i-1]; //Q16
t16a = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
t16b = (WebRtc_Word16) (tmp32-WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)t16a), 16));
if (t16b<0) t16a++;
tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
fQtmp = tmp32; // Q15
// Calculate g[i][0] = cth[i-1]*stateG[i-1] + sth[i-1]* f[i][0];
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[i-1], stateGQ15[i-1]); //Q15*Q15>>15 = Q15
tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], fQtmp); //Q15*Q15>>15 = Q15
tmp32 = tmp32 + tmp32b;//Q15+Q15 = Q15
gQ15[i][0] = tmp32; // Q15
}
/* filtering */
/* save the states */
for(k=0;k<orderCoef;k++)
{
for(n=0;n<HALF_SUBFRAMELEN-1;n++)
{
// Calculate f[k+1][n+1] = inv_cth[k]*(f[k][n+1] + sth[k]*g[k][n]);
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[k], gQ15[k][n]);//Q15*Q15>>15 = Q15
tmp32b= fQ15vec[n+1] + tmp32; //Q15+Q15=Q15
tmp32 = inv_cthQ16[k]; //Q16
t16a = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
t16b = (WebRtc_Word16) (tmp32-WEBRTC_SPL_LSHIFT_W32(((WebRtc_Word32)t16a), 16));
if (t16b<0) t16a++;
tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
fQ15vec[n+1] = tmp32; // Q15
// Calculate g[k+1][n+1] = cth[k]*g[k][n] + sth[k]* f[k+1][n+1];
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gQ15[k][n]); //Q15*Q15>>15 = Q15
tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[k], fQ15vec[n+1]); //Q15*Q15>>15 = Q15
tmp32 = tmp32 + tmp32b;//Q15+Q15 = Q15
gQ15[k+1][n+1] = tmp32; // Q15
}
}
fQ15vec[0] = fQtmp;
for(n=0;n<HALF_SUBFRAMELEN;n++)
{
//gain32 = WEBRTC_SPL_RSHIFT_W32(gain32, gain_sh); // Q(17+gain_sh) -> Q17
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
sh = 9-gain_sh; //number of needed shifts to reach Q9
t16a = (WebRtc_Word16) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
lat_outQ9[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = t16a;
}
/* save the states */
for (i=0;i<ord_1;i++)
{
stateGQ15[i] = gQ15[i][HALF_SUBFRAMELEN-1];
}
//process next frame
}
return;
}
/* ----------------AR filter-------------------------*/
/* filter the signal using normalized lattice filter */
void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
WebRtc_Word16 *stateGQ0,
WebRtc_Word32 *lat_inQ25,
WebRtc_Word16 *filt_coefQ15,
WebRtc_Word32 *gain_lo_hiQ17,
WebRtc_Word16 lo_hi,
WebRtc_Word16 *lat_outQ0)
{
int ii,n,k,i,u;
WebRtc_Word16 sthQ15[MAX_AR_MODEL_ORDER];
WebRtc_Word16 cthQ15[MAX_AR_MODEL_ORDER];
WebRtc_Word32 tmp32, tmp32_2;
WebRtc_Word16 tmpAR;
WebRtc_Word16 ARfQ0vec[HALF_SUBFRAMELEN];
WebRtc_Word16 ARgQ0vec[MAX_AR_MODEL_ORDER+1];
WebRtc_Word32 inv_gain32;
WebRtc_Word16 inv_gain16;
WebRtc_Word16 den16;
WebRtc_Word16 sh;
WebRtc_Word16 temp2,temp3;
WebRtc_Word16 ord_1 = orderCoef+1;
for (u=0;u<SUBFRAMES;u++)
{
//set the denominator and numerator of the Direct Form
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u) + lo_hi;
for (ii=0; ii<orderCoef; ii++) {
sthQ15[ii] = filt_coefQ15[temp2+ii];
}
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
/* Simulation of the 25 files shows that maximum value in
the vector gain_lo_hiQ17[] is 441344, which means that
it is log2((2^31)/441344) = 12.2 shifting bits from
saturation. Therefore, it should be safe to use Q27 instead
of Q17. */
tmp32 = WEBRTC_SPL_LSHIFT_W32(gain_lo_hiQ17[temp3], 10); // Q27
for (k=0;k<orderCoef;k++) {
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], tmp32); // Q15*Q27>>15 = Q27
}
sh = WebRtcSpl_NormW32(tmp32); // tmp32 is the gain
den16 = (WebRtc_Word16) WEBRTC_SPL_SHIFT_W32(tmp32, sh-16); //Q(27+sh-16) = Q(sh+11) (all 16 bits are value bits)
inv_gain32 = WebRtcSpl_DivW32W16((WebRtc_Word32)2147483647, den16); // 1/gain in Q31/Q(sh+11) = Q(20-sh)
//initial conditions
inv_gain16 = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(inv_gain32, 2); // 1/gain in Q(20-sh-2) = Q(18-sh)
for (i=0;i<HALF_SUBFRAMELEN;i++)
{
tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 1); //Q25->Q26
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
ARfQ0vec[i] = (WebRtc_Word16) WEBRTC_SPL_SAT(32767, tmp32, -32768); // Q0
}
for (i=orderCoef-1;i>=0;i--) //get the state of f&g for the first input, for all orders
{
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[i],ARfQ0vec[0])) - (WEBRTC_SPL_MUL_16_16(sthQ15[i],stateGQ0[i])) + 16384), 15);
tmpAR = (WebRtc_Word16) WEBRTC_SPL_SAT(32767, tmp32, -32768); // Q0
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[i],ARfQ0vec[0])) + (WEBRTC_SPL_MUL_16_16(cthQ15[i], stateGQ0[i])) + 16384), 15);
ARgQ0vec[i+1] = (WebRtc_Word16) WEBRTC_SPL_SAT(32767, tmp32, -32768); // Q0
ARfQ0vec[0] = tmpAR;
}
ARgQ0vec[0] = ARfQ0vec[0];
for(n=0;n<HALF_SUBFRAMELEN-1;n++)
{
tmpAR = ARfQ0vec[n+1];
for(k=orderCoef-1;k>=0;k--)
{
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[k], tmpAR)) - (WEBRTC_SPL_MUL_16_16(sthQ15[k], ARgQ0vec[k])) + 16384), 15);
tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[k], tmpAR)) + (WEBRTC_SPL_MUL_16_16(cthQ15[k], ARgQ0vec[k])) + 16384), 15);
tmpAR = (WebRtc_Word16) WEBRTC_SPL_SAT(32767, tmp32, -32768); // Q0
ARgQ0vec[k+1] = (WebRtc_Word16) WEBRTC_SPL_SAT(32767, tmp32_2, -32768); // Q0
}
ARfQ0vec[n+1] = tmpAR;
ARgQ0vec[0] = tmpAR;
}
for(n=0;n<HALF_SUBFRAMELEN;n++)
{
lat_outQ0[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = ARfQ0vec[n];
}
/* cannot use memcpy in the following */
for (i=0;i<ord_1;i++)
{
stateGQ0[i] = ARgQ0vec[i];
}
}
return;
}