From badf2b804413f3e70739519de276bdf320e9d3bf Mon Sep 17 00:00:00 2001 From: "kma@webrtc.org" Date: Wed, 11 Jan 2012 18:01:39 +0000 Subject: [PATCH] Optimized an AR function in iSAC fix for ARMv7 (not Neon) platforms. Bit exact. Speed doubled. Review URL: http://webrtc-codereview.appspot.com/327001 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1392 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/iSAC/fix/source/Android.mk | 10 ++- .../codecs/iSAC/fix/source/lattice.c | 43 +++++----- .../codecs/iSAC/fix/source/lattice_armv7.S | 82 +++++++++++++++++++ .../codecs/iSAC/fix/source/lattice_c.c | 49 +++++++++++ 4 files changed, 162 insertions(+), 22 deletions(-) create mode 100644 src/modules/audio_coding/codecs/iSAC/fix/source/lattice_armv7.S create mode 100644 src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk index 872d38873..c81d13a83 100644 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk @@ -45,6 +45,14 @@ LOCAL_SRC_FILES := \ spectrum_ar_model_tables.c \ transform.c +ifeq ($(ARCH_ARM_HAVE_ARMV7A),true) +LOCAL_SRC_FILES += \ + lattice_armv7.S +else +LOCAL_SRC_FILES += \ + lattice_c.c +endif + # Flags passed to both C and C++ files. LOCAL_CFLAGS := \ $(MY_WEBRTC_COMMON_DEFS) @@ -88,7 +96,7 @@ LOCAL_CFLAGS := \ LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../interface \ $(LOCAL_PATH)/../../../../../.. \ - $(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include + $(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include ifndef NDK_ROOT diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/lattice.c b/src/modules/audio_coding/codecs/iSAC/fix/source/lattice.c index 0f80d5872..a072d66b1 100644 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/lattice.c +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/lattice.c @@ -35,6 +35,16 @@ case when method 1) gave 650235648 and 2) gave 650235712. */ +/* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter + with coefficients cth_Q15[] and sth_Q15[]. + Implemented for both generic and ARMv7 platforms. + */ +void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0, + int16_t* ar_f_Q0, + int16_t* cth_Q15, + int16_t* sth_Q15, + int16_t order_coef); + /* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa(). It does: for 0 <= n < HALF_SUBFRAMELEN - 1: @@ -107,14 +117,14 @@ void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef, for (u=0;u>16 = Q(gain_sh) sh = 9-gain_sh; //number of needed shifts to reach Q9 t16a = (WebRtc_Word16) WEBRTC_SPL_SHIFT_W32(tmp32, sh); - lat_outQ9[n + WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN)] = t16a; + lat_outQ9[n + temp1] = t16a; } /* save the states */ @@ -230,6 +240,8 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef, for (u=0;uQ26 + tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + temp1], 1); //Q25->Q26 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh) tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0 @@ -280,23 +292,12 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef, } ARgQ0vec[0] = ARfQ0vec[0]; - for(n=0;n=0;k--) - { - tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[k], tmpAR)) - (WEBRTC_SPL_MUL_16_16(sthQ15[k], ARgQ0vec[k])) + 16384), 15); - tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[k], tmpAR)) + (WEBRTC_SPL_MUL_16_16(cthQ15[k], ARgQ0vec[k])) + 16384), 15); - tmpAR = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32); // Q0 - ARgQ0vec[k+1] = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32_2); // Q0 - } - ARfQ0vec[n+1] = tmpAR; - ARgQ0vec[0] = tmpAR; - } + // Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[]. + WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef); for(n=0;n= 0; k--) + + ldrh r7, [r3, #-2]! @ sth_Q15[k] + ldrh r6, [r2, #-2]! @ cth_Q15[k] + + ldrh r8, [r0, #-2] @ ar_g_Q0[k] + smlabb r11, r7, r5, r12 @ sth_Q15[k] * tmpAR + 16384 + smlabb r10, r6, r5, r12 @ cth_Q15[k] * tmpAR + 16384 + smulbb r7, r7, r8 @ sth_Q15[k] * ar_g_Q0[k] + smlabb r11, r6, r8, r11 @ cth_Q15[k]*ar_g_Q0[k]+(sth_Q15[k]*tmpAR+16384) + + sub r10, r10, r7 @ cth_Q15[k]*tmpAR+16384-(sth_Q15[k]*ar_g_Q0[k]) + ssat r11, #16, r11, asr #15 + ssat r5, #16, r10, asr #15 + strh r11, [r0], #-2 @ Output: ar_g_Q0[k+1] + + subs r9, #1 + bgt ORDER_COEF_LOOP + + strh r5, [r0] @ Output: ar_g_Q0[0] = tmpAR; + strh r5, [r1], #2 @ Output: ar_f_Q0[n+1] = tmpAR; + + subs r4, #1 + bne HALF_SUBFRAME_LOOP + + pop {r4-r11} + bx lr + +.fnend + diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c b/src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c new file mode 100644 index 000000000..80ccf39e1 --- /dev/null +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/lattice_c.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Contains the core loop function for the lattice filter AR routine + * for iSAC codec. + * + */ + +#include "settings.h" +#include "signal_processing_library.h" +#include "typedefs.h" + +/* Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients + * cth_Q15[] and sth_Q15[]. + */ +void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0, // Input samples + int16_t* ar_f_Q0, // Input samples + int16_t* cth_Q15, // Filter coefficients + int16_t* sth_Q15, // Filter coefficients + int16_t order_coef) { // order of the filter + int n = 0; + + for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) { + int k = 0; + int16_t tmpAR = 0; + int32_t tmp32 = 0; + int32_t tmp32_2 = 0; + + tmpAR = ar_f_Q0[n + 1]; + for (k = order_coef - 1; k >= 0; k--) { + tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cth_Q15[k], tmpAR)) + - (WEBRTC_SPL_MUL_16_16(sth_Q15[k], ar_g_Q0[k])) + 16384), 15); + tmp32_2 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sth_Q15[k], tmpAR)) + + (WEBRTC_SPL_MUL_16_16(cth_Q15[k], ar_g_Q0[k])) + 16384), 15); + tmpAR = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32); + ar_g_Q0[k + 1] = (WebRtc_Word16)WebRtcSpl_SatW32ToW16(tmp32_2); + } + ar_f_Q0[n + 1] = tmpAR; + ar_g_Q0[0] = tmpAR; + } +}