Optimized an AR function in iSAC fix for ARMv7 (not Neon) platforms.
Bit exact. Speed doubled. Review URL: http://webrtc-codereview.appspot.com/327001 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1392 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
04c18cb37a
commit
badf2b8044
@ -45,6 +45,14 @@ LOCAL_SRC_FILES := \
|
|||||||
spectrum_ar_model_tables.c \
|
spectrum_ar_model_tables.c \
|
||||||
transform.c
|
transform.c
|
||||||
|
|
||||||
|
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
||||||
|
LOCAL_SRC_FILES += \
|
||||||
|
lattice_armv7.S
|
||||||
|
else
|
||||||
|
LOCAL_SRC_FILES += \
|
||||||
|
lattice_c.c
|
||||||
|
endif
|
||||||
|
|
||||||
# Flags passed to both C and C++ files.
|
# Flags passed to both C and C++ files.
|
||||||
LOCAL_CFLAGS := \
|
LOCAL_CFLAGS := \
|
||||||
$(MY_WEBRTC_COMMON_DEFS)
|
$(MY_WEBRTC_COMMON_DEFS)
|
||||||
|
@ -35,6 +35,16 @@
|
|||||||
case when method 1) gave 650235648 and 2) gave 650235712.
|
case when method 1) gave 650235648 and 2) gave 650235712.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter
|
||||||
|
with coefficients cth_Q15[] and sth_Q15[].
|
||||||
|
Implemented for both generic and ARMv7 platforms.
|
||||||
|
*/
|
||||||
|
void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,
|
||||||
|
int16_t* ar_f_Q0,
|
||||||
|
int16_t* cth_Q15,
|
||||||
|
int16_t* sth_Q15,
|
||||||
|
int16_t order_coef);
|
||||||
|
|
||||||
/* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa().
|
/* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa().
|
||||||
It does:
|
It does:
|
||||||
for 0 <= n < HALF_SUBFRAMELEN - 1:
|
for 0 <= n < HALF_SUBFRAMELEN - 1:
|
||||||
@ -107,14 +117,14 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
|||||||
|
|
||||||
for (u=0;u<SUBFRAMES;u++)
|
for (u=0;u<SUBFRAMES;u++)
|
||||||
{
|
{
|
||||||
|
int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN);
|
||||||
|
|
||||||
/* set the Direct Form coefficients */
|
/* set the Direct Form coefficients */
|
||||||
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
|
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
|
||||||
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u)+lo_hi;
|
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u)+lo_hi;
|
||||||
|
|
||||||
/* compute lattice filter coefficients */
|
/* compute lattice filter coefficients */
|
||||||
for (ii=0; ii<orderCoef; ii++) {
|
memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(WebRtc_Word16));
|
||||||
sthQ15[ii] = filt_coefQ15[temp2+ii];
|
|
||||||
}
|
|
||||||
|
|
||||||
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
|
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
|
||||||
|
|
||||||
@ -136,8 +146,8 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
|||||||
/* initial conditions */
|
/* initial conditions */
|
||||||
for (i=0;i<HALF_SUBFRAMELEN;i++)
|
for (i=0;i<HALF_SUBFRAMELEN;i++)
|
||||||
{
|
{
|
||||||
fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 15); //Q15
|
fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + temp1], 15); //Q15
|
||||||
gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 15); //Q15
|
gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + temp1], 15); //Q15
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -182,7 +192,7 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
|||||||
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
|
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
|
||||||
sh = 9-gain_sh; //number of needed shifts to reach Q9
|
sh = 9-gain_sh; //number of needed shifts to reach Q9
|
||||||
t16a = (WebRtc_Word16) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
|
t16a = (WebRtc_Word16) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
|
||||||
lat_outQ9[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = t16a;
|
lat_outQ9[n + temp1] = t16a;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* save the states */
|
/* save the states */
|
||||||
@ -230,6 +240,8 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
|||||||
|
|
||||||
for (u=0;u<SUBFRAMES;u++)
|
for (u=0;u<SUBFRAMES;u++)
|
||||||
{
|
{
|
||||||
|
int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN);
|
||||||
|
|
||||||
//set the denominator and numerator of the Direct Form
|
//set the denominator and numerator of the Direct Form
|
||||||
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
|
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
|
||||||
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u) + lo_hi;
|
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u) + lo_hi;
|
||||||
@ -262,7 +274,7 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
|||||||
for (i=0;i<HALF_SUBFRAMELEN;i++)
|
for (i=0;i<HALF_SUBFRAMELEN;i++)
|
||||||
{
|
{
|
||||||
|
|
||||||
tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 1); //Q25->Q26
|
tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + temp1], 1); //Q25->Q26
|
||||||
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
|
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
|
||||||
tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
|
tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
|
||||||
|
|
||||||
@ -280,23 +292,12 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
|||||||
}
|
}
|
||||||
ARgQ0vec[0] = ARfQ0vec[0];
|
ARgQ0vec[0] = ARfQ0vec[0];
|
||||||
|
|
||||||
for(n=0;n<HALF_SUBFRAMELEN-1;n++)
|
// Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[].
|
||||||
{
|
WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef);
|
||||||
tmpAR = ARfQ0vec[n+1];
|
|
||||||
for(k=orderCoef-1;k>=0;k--)
|
|
||||||
{
|
|
||||||
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[k], tmpAR)) - (WEBRTC_SPL_MUL_16_16(sthQ15[k], ARgQ0vec[k])) + 16384), 15);
|
|
||||||
tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[k], tmpAR)) + (WEBRTC_SPL_MUL_16_16(cthQ15[k], ARgQ0vec[k])) + 16384), 15);
|
|
||||||
tmpAR = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
|
||||||
ARgQ0vec[k+1] = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32_2); // Q0
|
|
||||||
}
|
|
||||||
ARfQ0vec[n+1] = tmpAR;
|
|
||||||
ARgQ0vec[0] = tmpAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(n=0;n<HALF_SUBFRAMELEN;n++)
|
for(n=0;n<HALF_SUBFRAMELEN;n++)
|
||||||
{
|
{
|
||||||
lat_outQ0[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = ARfQ0vec[n];
|
lat_outQ0[n + temp1] = ARfQ0vec[n];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -0,0 +1,82 @@
|
|||||||
|
@
|
||||||
|
@ Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||||
|
@
|
||||||
|
@ Use of this source code is governed by a BSD-style license
|
||||||
|
@ that can be found in the LICENSE file in the root of the source
|
||||||
|
@ tree. An additional intellectual property rights grant can be found
|
||||||
|
@ in the file PATENTS. All contributing project authors may
|
||||||
|
@ be found in the AUTHORS file in the root of the source tree.
|
||||||
|
@
|
||||||
|
|
||||||
|
@ Contains a function for the core loop in the normalized lattice AR
|
||||||
|
@ filter routine for iSAC codec, optimized for ARMv7 platforms.
|
||||||
|
@
|
||||||
|
@ Output is bit-exact with the reference C code in lattic_c.c
|
||||||
|
@
|
||||||
|
@ Register usage:
|
||||||
|
@
|
||||||
|
@ r0: &ar_g_Q0
|
||||||
|
@ r1: &ar_f_Q0
|
||||||
|
@ r2: &cth_Q15
|
||||||
|
@ r3: &sth_Q15
|
||||||
|
@ r4: out loop counter
|
||||||
|
@ r5: tmpAR
|
||||||
|
@ r9: inner loop counter
|
||||||
|
@ r12: constant #16384
|
||||||
|
@ r6, r7, r8, r10, r11: scratch
|
||||||
|
|
||||||
|
#include "settings.h"
|
||||||
|
|
||||||
|
.arch armv7-a
|
||||||
|
.global WebRtcIsacfix_FilterArLoop
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
WebRtcIsacfix_FilterArLoop:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
.save {r4-r11}
|
||||||
|
push {r4-r11}
|
||||||
|
|
||||||
|
add r1, #2 @ &ar_f_Q0[1]
|
||||||
|
mov r12, #16384
|
||||||
|
mov r4, #HALF_SUBFRAMELEN
|
||||||
|
sub r4, #1 @ Outer loop counter = HALF_SUBFRAMELEN - 1
|
||||||
|
|
||||||
|
HALF_SUBFRAME_LOOP: @ for(n = 0; n < HALF_SUBFRAMELEN - 1; n++)
|
||||||
|
|
||||||
|
ldr r9, [sp, #32] @ Restore the inner loop counter to order_coef
|
||||||
|
ldrh r5, [r1] @ tmpAR = ar_f_Q0[n+1]
|
||||||
|
add r0, r9, asl #1 @ Restore r0 to &ar_g_Q0[order_coef]
|
||||||
|
add r2, r9, asl #1 @ Restore r2 to &cth_Q15[order_coef]
|
||||||
|
add r3, r9, asl #1 @ Restore r3 to &sth_Q15[order_coef]
|
||||||
|
|
||||||
|
ORDER_COEF_LOOP: @ for(k = order_coef - 1 ; k >= 0; k--)
|
||||||
|
|
||||||
|
ldrh r7, [r3, #-2]! @ sth_Q15[k]
|
||||||
|
ldrh r6, [r2, #-2]! @ cth_Q15[k]
|
||||||
|
|
||||||
|
ldrh r8, [r0, #-2] @ ar_g_Q0[k]
|
||||||
|
smlabb r11, r7, r5, r12 @ sth_Q15[k] * tmpAR + 16384
|
||||||
|
smlabb r10, r6, r5, r12 @ cth_Q15[k] * tmpAR + 16384
|
||||||
|
smulbb r7, r7, r8 @ sth_Q15[k] * ar_g_Q0[k]
|
||||||
|
smlabb r11, r6, r8, r11 @ cth_Q15[k]*ar_g_Q0[k]+(sth_Q15[k]*tmpAR+16384)
|
||||||
|
|
||||||
|
sub r10, r10, r7 @ cth_Q15[k]*tmpAR+16384-(sth_Q15[k]*ar_g_Q0[k])
|
||||||
|
ssat r11, #16, r11, asr #15
|
||||||
|
ssat r5, #16, r10, asr #15
|
||||||
|
strh r11, [r0], #-2 @ Output: ar_g_Q0[k+1]
|
||||||
|
|
||||||
|
subs r9, #1
|
||||||
|
bgt ORDER_COEF_LOOP
|
||||||
|
|
||||||
|
strh r5, [r0] @ Output: ar_g_Q0[0] = tmpAR;
|
||||||
|
strh r5, [r1], #2 @ Output: ar_f_Q0[n+1] = tmpAR;
|
||||||
|
|
||||||
|
subs r4, #1
|
||||||
|
bne HALF_SUBFRAME_LOOP
|
||||||
|
|
||||||
|
pop {r4-r11}
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
|
49
src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c
Normal file
49
src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Contains the core loop function for the lattice filter AR routine
|
||||||
|
* for iSAC codec.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "settings.h"
|
||||||
|
#include "signal_processing_library.h"
|
||||||
|
#include "typedefs.h"
|
||||||
|
|
||||||
|
/* Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
|
||||||
|
* cth_Q15[] and sth_Q15[].
|
||||||
|
*/
|
||||||
|
void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0, // Input samples
|
||||||
|
int16_t* ar_f_Q0, // Input samples
|
||||||
|
int16_t* cth_Q15, // Filter coefficients
|
||||||
|
int16_t* sth_Q15, // Filter coefficients
|
||||||
|
int16_t order_coef) { // order of the filter
|
||||||
|
int n = 0;
|
||||||
|
|
||||||
|
for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
|
||||||
|
int k = 0;
|
||||||
|
int16_t tmpAR = 0;
|
||||||
|
int32_t tmp32 = 0;
|
||||||
|
int32_t tmp32_2 = 0;
|
||||||
|
|
||||||
|
tmpAR = ar_f_Q0[n + 1];
|
||||||
|
for (k = order_coef - 1; k >= 0; k--) {
|
||||||
|
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cth_Q15[k], tmpAR))
|
||||||
|
- (WEBRTC_SPL_MUL_16_16(sth_Q15[k], ar_g_Q0[k])) + 16384), 15);
|
||||||
|
tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sth_Q15[k], tmpAR))
|
||||||
|
+ (WEBRTC_SPL_MUL_16_16(cth_Q15[k], ar_g_Q0[k])) + 16384), 15);
|
||||||
|
tmpAR = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32);
|
||||||
|
ar_g_Q0[k + 1] = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32_2);
|
||||||
|
}
|
||||||
|
ar_f_Q0[n + 1] = tmpAR;
|
||||||
|
ar_g_Q0[0] = tmpAR;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user