Optimized an AR function in iSAC fix for ARMv7 (not Neon) platforms.
Bit exact. Speed doubled. Review URL: http://webrtc-codereview.appspot.com/327001 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1392 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
04c18cb37a
commit
badf2b8044
@ -45,6 +45,14 @@ LOCAL_SRC_FILES := \
|
||||
spectrum_ar_model_tables.c \
|
||||
transform.c
|
||||
|
||||
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
||||
LOCAL_SRC_FILES += \
|
||||
lattice_armv7.S
|
||||
else
|
||||
LOCAL_SRC_FILES += \
|
||||
lattice_c.c
|
||||
endif
|
||||
|
||||
# Flags passed to both C and C++ files.
|
||||
LOCAL_CFLAGS := \
|
||||
$(MY_WEBRTC_COMMON_DEFS)
|
||||
@ -88,7 +96,7 @@ LOCAL_CFLAGS := \
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(LOCAL_PATH)/../interface \
|
||||
$(LOCAL_PATH)/../../../../../.. \
|
||||
$(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include
|
||||
$(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include
|
||||
|
||||
|
||||
ifndef NDK_ROOT
|
||||
|
@ -35,6 +35,16 @@
|
||||
case when method 1) gave 650235648 and 2) gave 650235712.
|
||||
*/
|
||||
|
||||
/* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter
|
||||
with coefficients cth_Q15[] and sth_Q15[].
|
||||
Implemented for both generic and ARMv7 platforms.
|
||||
*/
|
||||
void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,
|
||||
int16_t* ar_f_Q0,
|
||||
int16_t* cth_Q15,
|
||||
int16_t* sth_Q15,
|
||||
int16_t order_coef);
|
||||
|
||||
/* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa().
|
||||
It does:
|
||||
for 0 <= n < HALF_SUBFRAMELEN - 1:
|
||||
@ -107,14 +117,14 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
||||
|
||||
for (u=0;u<SUBFRAMES;u++)
|
||||
{
|
||||
int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN);
|
||||
|
||||
/* set the Direct Form coefficients */
|
||||
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
|
||||
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u)+lo_hi;
|
||||
|
||||
/* compute lattice filter coefficients */
|
||||
for (ii=0; ii<orderCoef; ii++) {
|
||||
sthQ15[ii] = filt_coefQ15[temp2+ii];
|
||||
}
|
||||
memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(WebRtc_Word16));
|
||||
|
||||
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
|
||||
|
||||
@ -136,8 +146,8 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
||||
/* initial conditions */
|
||||
for (i=0;i<HALF_SUBFRAMELEN;i++)
|
||||
{
|
||||
fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 15); //Q15
|
||||
gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 15); //Q15
|
||||
fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + temp1], 15); //Q15
|
||||
gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)lat_inQ0[i + temp1], 15); //Q15
|
||||
}
|
||||
|
||||
|
||||
@ -182,7 +192,7 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
||||
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
|
||||
sh = 9-gain_sh; //number of needed shifts to reach Q9
|
||||
t16a = (WebRtc_Word16) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
|
||||
lat_outQ9[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = t16a;
|
||||
lat_outQ9[n + temp1] = t16a;
|
||||
}
|
||||
|
||||
/* save the states */
|
||||
@ -230,6 +240,8 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
||||
|
||||
for (u=0;u<SUBFRAMES;u++)
|
||||
{
|
||||
int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN);
|
||||
|
||||
//set the denominator and numerator of the Direct Form
|
||||
temp2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(u, orderCoef);
|
||||
temp3 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(2, u) + lo_hi;
|
||||
@ -262,7 +274,7 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
||||
for (i=0;i<HALF_SUBFRAMELEN;i++)
|
||||
{
|
||||
|
||||
tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)], 1); //Q25->Q26
|
||||
tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + temp1], 1); //Q25->Q26
|
||||
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
|
||||
tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
|
||||
|
||||
@ -280,23 +292,12 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
||||
}
|
||||
ARgQ0vec[0] = ARfQ0vec[0];
|
||||
|
||||
for(n=0;n<HALF_SUBFRAMELEN-1;n++)
|
||||
{
|
||||
tmpAR = ARfQ0vec[n+1];
|
||||
for(k=orderCoef-1;k>=0;k--)
|
||||
{
|
||||
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[k], tmpAR)) - (WEBRTC_SPL_MUL_16_16(sthQ15[k], ARgQ0vec[k])) + 16384), 15);
|
||||
tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[k], tmpAR)) + (WEBRTC_SPL_MUL_16_16(cthQ15[k], ARgQ0vec[k])) + 16384), 15);
|
||||
tmpAR = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||
ARgQ0vec[k+1] = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32_2); // Q0
|
||||
}
|
||||
ARfQ0vec[n+1] = tmpAR;
|
||||
ARgQ0vec[0] = tmpAR;
|
||||
}
|
||||
// Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[].
|
||||
WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef);
|
||||
|
||||
for(n=0;n<HALF_SUBFRAMELEN;n++)
|
||||
{
|
||||
lat_outQ0[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = ARfQ0vec[n];
|
||||
lat_outQ0[n + temp1] = ARfQ0vec[n];
|
||||
}
|
||||
|
||||
|
||||
|
@ -0,0 +1,82 @@
|
||||
@
|
||||
@ Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
@
|
||||
|
||||
@ Contains a function for the core loop in the normalized lattice AR
|
||||
@ filter routine for iSAC codec, optimized for ARMv7 platforms.
|
||||
@
|
||||
@ Output is bit-exact with the reference C code in lattic_c.c
|
||||
@
|
||||
@ Register usage:
|
||||
@
|
||||
@ r0: &ar_g_Q0
|
||||
@ r1: &ar_f_Q0
|
||||
@ r2: &cth_Q15
|
||||
@ r3: &sth_Q15
|
||||
@ r4: out loop counter
|
||||
@ r5: tmpAR
|
||||
@ r9: inner loop counter
|
||||
@ r12: constant #16384
|
||||
@ r6, r7, r8, r10, r11: scratch
|
||||
|
||||
#include "settings.h"
|
||||
|
||||
.arch armv7-a
|
||||
.global WebRtcIsacfix_FilterArLoop
|
||||
.align 2
|
||||
|
||||
WebRtcIsacfix_FilterArLoop:
|
||||
.fnstart
|
||||
|
||||
.save {r4-r11}
|
||||
push {r4-r11}
|
||||
|
||||
add r1, #2 @ &ar_f_Q0[1]
|
||||
mov r12, #16384
|
||||
mov r4, #HALF_SUBFRAMELEN
|
||||
sub r4, #1 @ Outer loop counter = HALF_SUBFRAMELEN - 1
|
||||
|
||||
HALF_SUBFRAME_LOOP: @ for(n = 0; n < HALF_SUBFRAMELEN - 1; n++)
|
||||
|
||||
ldr r9, [sp, #32] @ Restore the inner loop counter to order_coef
|
||||
ldrh r5, [r1] @ tmpAR = ar_f_Q0[n+1]
|
||||
add r0, r9, asl #1 @ Restore r0 to &ar_g_Q0[order_coef]
|
||||
add r2, r9, asl #1 @ Restore r2 to &cth_Q15[order_coef]
|
||||
add r3, r9, asl #1 @ Restore r3 to &sth_Q15[order_coef]
|
||||
|
||||
ORDER_COEF_LOOP: @ for(k = order_coef - 1 ; k >= 0; k--)
|
||||
|
||||
ldrh r7, [r3, #-2]! @ sth_Q15[k]
|
||||
ldrh r6, [r2, #-2]! @ cth_Q15[k]
|
||||
|
||||
ldrh r8, [r0, #-2] @ ar_g_Q0[k]
|
||||
smlabb r11, r7, r5, r12 @ sth_Q15[k] * tmpAR + 16384
|
||||
smlabb r10, r6, r5, r12 @ cth_Q15[k] * tmpAR + 16384
|
||||
smulbb r7, r7, r8 @ sth_Q15[k] * ar_g_Q0[k]
|
||||
smlabb r11, r6, r8, r11 @ cth_Q15[k]*ar_g_Q0[k]+(sth_Q15[k]*tmpAR+16384)
|
||||
|
||||
sub r10, r10, r7 @ cth_Q15[k]*tmpAR+16384-(sth_Q15[k]*ar_g_Q0[k])
|
||||
ssat r11, #16, r11, asr #15
|
||||
ssat r5, #16, r10, asr #15
|
||||
strh r11, [r0], #-2 @ Output: ar_g_Q0[k+1]
|
||||
|
||||
subs r9, #1
|
||||
bgt ORDER_COEF_LOOP
|
||||
|
||||
strh r5, [r0] @ Output: ar_g_Q0[0] = tmpAR;
|
||||
strh r5, [r1], #2 @ Output: ar_f_Q0[n+1] = tmpAR;
|
||||
|
||||
subs r4, #1
|
||||
bne HALF_SUBFRAME_LOOP
|
||||
|
||||
pop {r4-r11}
|
||||
bx lr
|
||||
|
||||
.fnend
|
||||
|
49
src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c
Normal file
49
src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Contains the core loop function for the lattice filter AR routine
|
||||
* for iSAC codec.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "settings.h"
|
||||
#include "signal_processing_library.h"
|
||||
#include "typedefs.h"
|
||||
|
||||
/* Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
|
||||
* cth_Q15[] and sth_Q15[].
|
||||
*/
|
||||
void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0, // Input samples
|
||||
int16_t* ar_f_Q0, // Input samples
|
||||
int16_t* cth_Q15, // Filter coefficients
|
||||
int16_t* sth_Q15, // Filter coefficients
|
||||
int16_t order_coef) { // order of the filter
|
||||
int n = 0;
|
||||
|
||||
for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
|
||||
int k = 0;
|
||||
int16_t tmpAR = 0;
|
||||
int32_t tmp32 = 0;
|
||||
int32_t tmp32_2 = 0;
|
||||
|
||||
tmpAR = ar_f_Q0[n + 1];
|
||||
for (k = order_coef - 1; k >= 0; k--) {
|
||||
tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cth_Q15[k], tmpAR))
|
||||
- (WEBRTC_SPL_MUL_16_16(sth_Q15[k], ar_g_Q0[k])) + 16384), 15);
|
||||
tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sth_Q15[k], tmpAR))
|
||||
+ (WEBRTC_SPL_MUL_16_16(cth_Q15[k], ar_g_Q0[k])) + 16384), 15);
|
||||
tmpAR = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32);
|
||||
ar_g_Q0[k + 1] = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32_2);
|
||||
}
|
||||
ar_f_Q0[n + 1] = tmpAR;
|
||||
ar_g_Q0[0] = tmpAR;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user