Optimized function AllpassFilter2FixDec16() in isac fix for Android Neon platforms.

With an offline test, codec cycles were reduced by 4%.
Review URL: https://webrtc-codereview.appspot.com/936007

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3066 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2012-11-09 00:39:45 +00:00
parent b952a90038
commit fa65c851fe
8 changed files with 528 additions and 92 deletions

View File

@ -28,6 +28,36 @@ void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io,
const int16_t *coefficient,
int32_t *state);
typedef void (*AllpassFilter2FixDec16)(
int16_t *data_ch1, // Input and output in channel 1, in Q0
int16_t *data_ch2, // Input and output in channel 2, in Q0
const int16_t *factor_ch1, // Scaling factor for channel 1, in Q15
const int16_t *factor_ch2, // Scaling factor for channel 2, in Q15
const int length, // Length of the data buffers
int32_t *filter_state_ch1, // Filter state for channel 1, in Q16
int32_t *filter_state_ch2); // Filter state for channel 2, in Q16
extern AllpassFilter2FixDec16 WebRtcIsacfix_AllpassFilter2FixDec16;
void WebRtcIsacfix_AllpassFilter2FixDec16C(
int16_t *data_ch1,
int16_t *data_ch2,
const int16_t *factor_ch1,
const int16_t *factor_ch2,
const int length,
int32_t *filter_state_ch1,
int32_t *filter_state_ch2);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
void WebRtcIsacfix_AllpassFilter2FixDec16Neon(
int16_t *data_ch1,
int16_t *data_ch2,
const int16_t *factor_ch1,
const int16_t *factor_ch2,
const int length,
int32_t *filter_state_ch1,
int32_t *filter_state_ch2);
#endif
#if defined(__cplusplus) || defined(c_plusplus)
}
#endif

View File

@ -18,38 +18,74 @@
*
*/
#include "codec.h"
#include "filterbank_internal.h"
#include <assert.h>
#include "codec.h"
#include "filterbank_tables.h"
#include "settings.h"
// Declare a function pointer.
AllpassFilter2FixDec16 WebRtcIsacfix_AllpassFilter2FixDec16;
static void AllpassFilter2FixDec16(WebRtc_Word16 *InOut16, //Q0
const WebRtc_Word16 *APSectionFactors, //Q15
WebRtc_Word16 lengthInOut,
WebRtc_Word16 NumberOfSections,
WebRtc_Word32 *FilterState) //Q16
{
int n, j;
WebRtc_Word32 a, b;
void WebRtcIsacfix_AllpassFilter2FixDec16C(
int16_t *data_ch1, // Input and output in channel 1, in Q0
int16_t *data_ch2, // Input and output in channel 2, in Q0
const int16_t *factor_ch1, // Scaling factor for channel 1, in Q15
const int16_t *factor_ch2, // Scaling factor for channel 2, in Q15
const int length, // Length of the data buffers
int32_t *filter_state_ch1, // Filter state for channel 1, in Q16
int32_t *filter_state_ch2) { // Filter state for channel 2, in Q16
int n = 0;
int32_t state0_ch1 = filter_state_ch1[0], state1_ch1 = filter_state_ch1[1];
int32_t state0_ch2 = filter_state_ch2[0], state1_ch2 = filter_state_ch2[1];
int16_t in_out = 0;
int32_t a = 0, b = 0;
for (j=0; j<NumberOfSections; j++) {
for (n=0;n<lengthInOut;n++) {
// Assembly file assumption.
assert(length % 2 == 0);
for (n = 0; n < length; n++) {
// Process channel 1:
in_out = data_ch1[n];
a = WEBRTC_SPL_MUL_16_16(factor_ch1[0], in_out); // Q15 * Q0 = Q15
a <<= 1; // Q15 -> Q16
b = WEBRTC_SPL_ADD_SAT_W32(a, state0_ch1);
a = WEBRTC_SPL_MUL_16_16(-factor_ch1[0], (int16_t) (b >> 16)); // Q15
state0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a << 1, (uint32_t)in_out << 16); // Q16
in_out = (int16_t) (b >> 16); // Save as Q0
a = WEBRTC_SPL_MUL_16_16(APSectionFactors[j], InOut16[n]); //Q15*Q0=Q15
a = WEBRTC_SPL_LSHIFT_W32(a, 1); // Q15 -> Q16
b = WEBRTC_SPL_ADD_SAT_W32(a, FilterState[j]); //Q16+Q16=Q16
a = WEBRTC_SPL_MUL_16_16_RSFT(-APSectionFactors[j], (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(b, 16), 0); //Q15*Q0=Q15
FilterState[j] = WEBRTC_SPL_ADD_SAT_W32(WEBRTC_SPL_LSHIFT_W32(a,1), WEBRTC_SPL_LSHIFT_W32((WebRtc_UWord32)InOut16[n],16)); // Q15<<1 + Q0<<16 = Q16 + Q16 = Q16
InOut16[n] = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(b, 16); //Save as Q0
a = WEBRTC_SPL_MUL_16_16(factor_ch1[1], in_out); // Q15 * Q0 = Q15
a <<= 1; // Q15 -> Q16
b = WEBRTC_SPL_ADD_SAT_W32(a, state1_ch1); // Q16
a = WEBRTC_SPL_MUL_16_16(-factor_ch1[1], (int16_t) (b >> 16)); // Q15
state1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a << 1, (uint32_t)in_out << 16); // Q16
data_ch1[n] = (int16_t) (b >> 16); // Save as Q0
}
// Process channel 2:
in_out = data_ch2[n];
a = WEBRTC_SPL_MUL_16_16(factor_ch2[0], in_out); // Q15 * Q0 = Q15
a <<= 1; // Q15 -> Q16
b = WEBRTC_SPL_ADD_SAT_W32(a, state0_ch2); // Q16
a = WEBRTC_SPL_MUL_16_16(-factor_ch2[0], (int16_t) (b >> 16)); // Q15
state0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a << 1, (uint32_t)in_out << 16); // Q16
in_out = (int16_t) (b >> 16); // Save as Q0
a = WEBRTC_SPL_MUL_16_16(factor_ch2[1], in_out); // Q15 * Q0 = Q15
a <<= 1; // Q15 -> Q16
b = WEBRTC_SPL_ADD_SAT_W32(a, state1_ch2); // Q16
a = WEBRTC_SPL_MUL_16_16(-factor_ch2[1], (int16_t) (b >> 16)); // Q15
state1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a << 1, (uint32_t)in_out << 16); // Q16
data_ch2[n] = (int16_t) (b >> 16); // Save as Q0
}
filter_state_ch1[0] = state0_ch1;
filter_state_ch1[1] = state1_ch1;
filter_state_ch2[0] = state0_ch2;
filter_state_ch2[1] = state1_ch2;
}
void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io,
int16_t len,
const int16_t *coefficient,
@ -105,7 +141,7 @@ void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io,
c = (WebRtc_Word32)WEBRTC_SPL_SAT(536870911, c, -536870912);
state1 = state0;
state0 = WEBRTC_SPL_LSHIFT_W32(c, 2); // Write state as Q4
state0 = WEBRTC_SPL_LSHIFT_W32(c, 2); // Write state as Q4
}
state[0] = state0;
state[1] = state1;
@ -125,8 +161,8 @@ void WebRtcIsacfix_SplitAndFilter1(WebRtc_Word16 *pin,
WebRtc_Word16 tempin_ch1[FRAMESAMPLES/2 + QLOOKAHEAD];
WebRtc_Word16 tempin_ch2[FRAMESAMPLES/2 + QLOOKAHEAD];
WebRtc_Word32 tmpState[WEBRTC_SPL_MUL_16_16(2,(QORDER-1))]; /* 4 */
WebRtc_Word32 tmpState_ch1[2 * (QORDER-1)]; /* 4 */
WebRtc_Word32 tmpState_ch2[2 * (QORDER-1)]; /* 4 */
/* High pass filter */
WebRtcIsacfix_HighpassFilterFixDec32(pin, FRAMESAMPLES, WebRtcIsacfix_kHpStCoeffInQ30, prefiltdata->HPstates_fix);
@ -155,16 +191,25 @@ void WebRtcIsacfix_SplitAndFilter1(WebRtc_Word16 *pin,
/*obtain polyphase components by forward all-pass filtering through each channel */
/* The all pass filtering automatically updates the filter states which are exported in the
prefiltdata structure */
AllpassFilter2FixDec16(tempin_ch1,WebRtcIsacfix_kUpperApFactorsQ15, FRAMESAMPLES/2 , NUMBEROFCHANNELAPSECTIONS, prefiltdata->INSTAT1_fix);
AllpassFilter2FixDec16(tempin_ch2,WebRtcIsacfix_kLowerApFactorsQ15, FRAMESAMPLES/2 , NUMBEROFCHANNELAPSECTIONS, prefiltdata->INSTAT2_fix);
for (k=0;k<WEBRTC_SPL_MUL_16_16(2, (QORDER-1));k++)
tmpState[k] = prefiltdata->INSTAT1_fix[k];
AllpassFilter2FixDec16(tempin_ch1 + FRAMESAMPLES/2,WebRtcIsacfix_kUpperApFactorsQ15, QLOOKAHEAD , NUMBEROFCHANNELAPSECTIONS, tmpState);
for (k=0;k<WEBRTC_SPL_MUL_16_16(2, (QORDER-1));k++)
tmpState[k] = prefiltdata->INSTAT2_fix[k];
AllpassFilter2FixDec16(tempin_ch2 + FRAMESAMPLES/2,WebRtcIsacfix_kLowerApFactorsQ15, QLOOKAHEAD , NUMBEROFCHANNELAPSECTIONS, tmpState);
WebRtcIsacfix_AllpassFilter2FixDec16(tempin_ch1,
tempin_ch2,
WebRtcIsacfix_kUpperApFactorsQ15,
WebRtcIsacfix_kLowerApFactorsQ15,
FRAMESAMPLES/2,
prefiltdata->INSTAT1_fix,
prefiltdata->INSTAT2_fix);
for (k=0;k<WEBRTC_SPL_MUL_16_16(2, (QORDER-1));k++) {
tmpState_ch1[k] = prefiltdata->INSTAT1_fix[k];
tmpState_ch2[k] = prefiltdata->INSTAT2_fix[k];
}
WebRtcIsacfix_AllpassFilter2FixDec16(tempin_ch1 + FRAMESAMPLES/2,
tempin_ch2 + FRAMESAMPLES/2,
WebRtcIsacfix_kUpperApFactorsQ15,
WebRtcIsacfix_kLowerApFactorsQ15,
QLOOKAHEAD,
tmpState_ch1,
tmpState_ch2);
/* Now Construct low-pass and high-pass signals as combinations of polyphase components */
for (k=0; k<FRAMESAMPLES/2 + QLOOKAHEAD; k++) {
@ -217,9 +262,13 @@ void WebRtcIsacfix_SplitAndFilter2(WebRtc_Word16 *pin,
/*obtain polyphase components by forward all-pass filtering through each channel */
/* The all pass filtering automatically updates the filter states which are exported in the
prefiltdata structure */
AllpassFilter2FixDec16(tempin_ch1,WebRtcIsacfix_kUpperApFactorsQ15, FRAMESAMPLES/2 , NUMBEROFCHANNELAPSECTIONS, prefiltdata->INSTAT1_fix);
AllpassFilter2FixDec16(tempin_ch2,WebRtcIsacfix_kLowerApFactorsQ15, FRAMESAMPLES/2 , NUMBEROFCHANNELAPSECTIONS, prefiltdata->INSTAT2_fix);
WebRtcIsacfix_AllpassFilter2FixDec16(tempin_ch1,
tempin_ch2,
WebRtcIsacfix_kUpperApFactorsQ15,
WebRtcIsacfix_kLowerApFactorsQ15,
FRAMESAMPLES/2,
prefiltdata->INSTAT1_fix,
prefiltdata->INSTAT2_fix);
/* Now Construct low-pass and high-pass signals as combinations of polyphase components */
for (k=0; k<FRAMESAMPLES/2; k++) {
@ -267,17 +316,21 @@ void WebRtcIsacfix_FilterAndCombine1(WebRtc_Word16 *tempin_ch1,
int k;
WebRtc_Word16 in[FRAMESAMPLES];
/* all-pass filter the new upper channel signal. HOWEVER, use the all-pass filter factors
that were used as a lower channel at the encoding side. So at the decoder, the
corresponding all-pass filter factors for each channel are swapped.*/
AllpassFilter2FixDec16(tempin_ch1, WebRtcIsacfix_kLowerApFactorsQ15, FRAMESAMPLES/2, NUMBEROFCHANNELAPSECTIONS,postfiltdata->STATE_0_UPPER_fix);
/* Now, all-pass filter the new lower channel signal. But since all-pass filter factors
at the decoder are swapped from the ones at the encoder, the 'upper' channel
all-pass filter factors (kUpperApFactors) are used to filter this new lower channel signal */
AllpassFilter2FixDec16(tempin_ch2, WebRtcIsacfix_kUpperApFactorsQ15, FRAMESAMPLES/2, NUMBEROFCHANNELAPSECTIONS,postfiltdata->STATE_0_LOWER_fix);
/* all-pass filter the new upper and lower channel signal.
For upper channel, use the all-pass filter factors that were used as a
lower channel at the encoding side. So at the decoder, the corresponding
all-pass filter factors for each channel are swapped.
For lower channel signal, since all-pass filter factors at the decoder are
swapped from the ones at the encoder, the 'upper' channel all-pass filter
factors (kUpperApFactors) are used to filter this new lower channel signal.
*/
WebRtcIsacfix_AllpassFilter2FixDec16(tempin_ch1,
tempin_ch2,
WebRtcIsacfix_kLowerApFactorsQ15,
WebRtcIsacfix_kUpperApFactorsQ15,
FRAMESAMPLES/2,
postfiltdata->STATE_0_UPPER_fix,
postfiltdata->STATE_0_LOWER_fix);
/* Merge outputs to form the full length output signal.*/
for (k=0;k<FRAMESAMPLES/2;k++) {
@ -324,17 +377,21 @@ void WebRtcIsacfix_FilterAndCombine2(WebRtc_Word16 *tempin_ch1,
int k;
WebRtc_Word16 in[FRAMESAMPLES];
/* all-pass filter the new upper channel signal. HOWEVER, use the all-pass filter factors
that were used as a lower channel at the encoding side. So at the decoder, the
corresponding all-pass filter factors for each channel are swapped.*/
AllpassFilter2FixDec16(tempin_ch1, WebRtcIsacfix_kLowerApFactorsQ15,(WebRtc_Word16) (len/2), NUMBEROFCHANNELAPSECTIONS,postfiltdata->STATE_0_UPPER_fix);
/* Now, all-pass filter the new lower channel signal. But since all-pass filter factors
at the decoder are swapped from the ones at the encoder, the 'upper' channel
all-pass filter factors (kUpperApFactors) are used to filter this new lower channel signal */
AllpassFilter2FixDec16(tempin_ch2, WebRtcIsacfix_kUpperApFactorsQ15, (WebRtc_Word16) (len/2), NUMBEROFCHANNELAPSECTIONS,postfiltdata->STATE_0_LOWER_fix);
/* all-pass filter the new upper and lower channel signal.
For upper channel, use the all-pass filter factors that were used as a
lower channel at the encoding side. So at the decoder, the corresponding
all-pass filter factors for each channel are swapped.
For lower channel signal, since all-pass filter factors at the decoder are
swapped from the ones at the encoder, the 'upper' channel all-pass filter
factors (kUpperApFactors) are used to filter this new lower channel signal.
*/
WebRtcIsacfix_AllpassFilter2FixDec16(tempin_ch1,
tempin_ch2,
WebRtcIsacfix_kLowerApFactorsQ15,
WebRtcIsacfix_kUpperApFactorsQ15,
len / 2,
postfiltdata->STATE_0_UPPER_fix,
postfiltdata->STATE_0_LOWER_fix);
/* Merge outputs to form the full length output signal.*/
for (k=0;k<len/2;k++) {

View File

@ -0,0 +1,270 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ Contains a function for WebRtcIsacfix_AllpassFilter2FixDec16Neon()
@ in iSAC codec, optimized for ARM Neon platform. Bit exact with function
@ WebRtcIsacfix_AllpassFilter2FixDec16Neon() in filterbanks.c. Prototype
@ C code is at end of this file.
.arch armv7-a
.fpu neon
.global WebRtcIsacfix_AllpassFilter2FixDec16Neon
.align 2
@void WebRtcIsacfix_AllpassFilter2FixDec16Neon(
@ int16_t *data_ch1, // Input and output in channel 1, in Q0
@ int16_t *data_ch2, // Input and output in channel 2, in Q0
@ const int16_t *factor_ch1, // Scaling factor for channel 1, in Q15
@ const int16_t *factor_ch2, // Scaling factor for channel 2, in Q15
@ const int length, // Length of the data buffers
@ int32_t *filter_state_ch1, // Filter state for channel 1, in Q16
@ int32_t *filter_state_ch2); // Filter state for channel 2, in Q16
WebRtcIsacfix_AllpassFilter2FixDec16Neon:
push {r4 - r7}
ldr r5, [sp, #24] @ filter_state_ch2
ldr r6, [sp, #20] @ filter_state_ch1
@ Initialize the Neon registers.
vld1.16 d0[0], [r0]! @ data_ch1[0]
vld1.16 d0[2], [r1]! @ data_ch2[0]
vld1.32 d30[0], [r2] @ factor_ch1[0], factor_ch1[1]
vld1.32 d30[1], [r3] @ factor_ch2[0], factor_ch2[1]
vld1.32 d16[0], [r6]! @ filter_state_ch1[0]
vld1.32 d17[0], [r5]! @ filter_state_ch2[0]
vneg.s16 d31, d30
ldr r3, [sp, #16] @ length
mov r4, #4 @ Post offset value for the loop
mov r2, #-2 @ Post offset value for the loop
sub r3, #2 @ Loop counter
@ Loop unrolling pre-processing.
vqdmull.s16 q1, d30, d0
vshll.s16 q0, d0, #16
vqadd.s32 q2, q1, q8
vshrn.i32 d6, q2, #16
vmull.s16 q1, d31, d6
vshl.s32 q1, #1
vqadd.s32 q8, q1, q0
vld1.32 d16[1], [r6] @ filter_state_ch1[1]
vld1.32 d17[1], [r5] @ filter_state_ch2[1]
sub r6, #4 @ &filter_state_ch1[0]
sub r5, #4 @ &filter_state_ch2[0]
vld1.16 d6[1], [r0], r2 @ data_ch1[1]
vld1.16 d6[3], [r1], r2 @ data_ch2[1]
vrev32.16 d0, d6
FOR_LOOP:
vqdmull.s16 q1, d30, d0
vshll.s16 q0, d0, #16
vqadd.s32 q2, q1, q8
vshrn.i32 d4, q2, #16
vmull.s16 q1, d31, d4
vst1.16 d4[1], [r0], r4 @ Store data_ch1[n]
vst1.16 d4[3], [r1], r4 @ Store data_ch2[n]
vshl.s32 q1, #1
vld1.16 d4[1], [r0], r2 @ Load data_ch1[n + 2]
vld1.16 d4[3], [r1], r2 @ Load data_ch2[n + 2]
vqadd.s32 q8, q1, q0
vrev32.16 d0, d4
vqdmull.s16 q1, d30, d0
subs r3, #2
vqadd.s32 q2, q1, q8
vshrn.i32 d6, q2, #16
vmull.s16 q1, d31, d6
vshll.s16 q0, d0, #16
vst1.16 d6[1], [r0], r4 @ Store data_ch1[n + 1]
vst1.16 d6[3], [r1], r4 @ Store data_ch2[n + 1]
vshl.s32 q1, #1
vld1.16 d6[1], [r0], r2 @ Load data_ch1[n + 3]
vld1.16 d6[3], [r1], r2 @ Load data_ch2[n + 3]
vqadd.s32 q8, q1, q0
vrev32.16 d0, d6
bgt FOR_LOOP
@ Loop unrolling post-processing.
vqdmull.s16 q1, d30, d0
vshll.s16 q0, d0, #16
vqadd.s32 q2, q1, q8
vshrn.i32 d4, q2, #16
vmull.s16 q1, d31, d4
vst1.16 d4[1], [r0]! @ Store data_ch1[n]
vst1.16 d4[3], [r1]! @ Store data_ch2[n]
vshl.s32 q1, #1
vqadd.s32 q8, q1, q0
vrev32.16 d0, d4
vqdmull.s16 q1, d30, d0
vshll.s16 q0, d0, #16
vqadd.s32 q2, q1, q8
vshrn.i32 d6, q2, #16
vmull.s16 q1, d31, d6
vst1.16 d6[1], [r0] @ Store data_ch1[n + 1]
vst1.16 d6[3], [r1] @ Store data_ch2[n + 1]
vshl.s32 q1, #1
vst1.32 d16[0], [r6]! @ Store filter_state_ch1[0]
vqadd.s32 q9, q1, q0
vst1.32 d17[0], [r5]! @ Store filter_state_ch1[1]
vst1.32 d18[1], [r6] @ Store filter_state_ch2[0]
vst1.32 d19[1], [r5] @ Store filter_state_ch2[1]
pop {r4 - r7}
bx lr
@void AllpassFilter2FixDec16BothChannels(
@ int16_t *data_ch1, // Input and output in channel 1, in Q0
@ int16_t *data_ch2, // Input and output in channel 2, in Q0
@ const int16_t *factor_ch1, // Scaling factor for channel 1, in Q15
@ const int16_t *factor_ch2, // Scaling factor for channel 2, in Q15
@ const int length, // Length of the data buffers
@ int32_t *filter_state_ch1, // Filter state for channel 1, in Q16
@ int32_t *filter_state_ch2) { // Filter state for channel 2, in Q16
@ int n = 0;
@ int32_t state0_ch1 = filter_state_ch1[0], state1_ch1 = filter_state_ch1[1];
@ int32_t state0_ch2 = filter_state_ch2[0], state1_ch2 = filter_state_ch2[1];
@ int16_t sample0_ch1 = 0, sample0_ch2 = 0;
@ int16_t sample1_ch1 = 0, sample1_ch2 = 0;
@ int32_t a0_ch1 = 0, a0_ch2 = 0;
@ int32_t b0_ch1 = 0, b0_ch2 = 0;
@
@ int32_t a1_ch1 = 0, a1_ch2 = 0;
@ int32_t b1_ch1 = 0, b1_ch2 = 0;
@ int32_t b2_ch1 = 0, b2_ch2 = 0;
@
@ // Loop unrolling preprocessing.
@
@ sample0_ch1 = data_ch1[n];
@ sample0_ch2 = data_ch2[n];
@
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[0], sample0_ch1) << 1;
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[0], sample0_ch2) << 1;
@
@ b0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1, state0_ch1);
@ b0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2, state0_ch2); //Q16+Q16=Q16
@
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[0], (int16_t) (b0_ch1 >> 16));
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[0], (int16_t) (b0_ch2 >> 16));
@
@ state0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1 <<1, (uint32_t)sample0_ch1 << 16);
@ state0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2 <<1, (uint32_t)sample0_ch2 << 16);
@
@ sample1_ch1 = data_ch1[n + 1];
@ sample0_ch1 = (int16_t) (b0_ch1 >> 16); //Save as Q0
@ sample1_ch2 = data_ch2[n + 1];
@ sample0_ch2 = (int16_t) (b0_ch2 >> 16); //Save as Q0
@
@
@ for (n = 0; n < length - 2; n += 2) {
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[0], sample1_ch1) << 1;
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[1], sample0_ch1) << 1;
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[0], sample1_ch2 ) << 1;
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[1], sample0_ch2) << 1;
@
@ b1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1, state0_ch1);
@ b0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1, state1_ch1); //Q16+Q16=Q16
@ b1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2, state0_ch2); //Q16+Q16=Q16
@ b0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2, state1_ch2); //Q16+Q16=Q16
@
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[0], (int16_t) (b1_ch1 >> 16));
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[1], (int16_t) (b0_ch1 >> 16));
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[0], (int16_t) (b1_ch2 >> 16));
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[1], (int16_t) (b0_ch2 >> 16));
@
@ state0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1<<1, (uint32_t)sample1_ch1 <<16);
@ state1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1<<1, (uint32_t)sample0_ch1 <<16);
@ state0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2<<1, (uint32_t)sample1_ch2 <<16);
@ state1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2<<1, (uint32_t)sample0_ch2 <<16);
@
@ sample0_ch1 = data_ch1[n + 2];
@ sample1_ch1 = (int16_t) (b1_ch1 >> 16); //Save as Q0
@ sample0_ch2 = data_ch2[n + 2];
@ sample1_ch2 = (int16_t) (b1_ch2 >> 16); //Save as Q0
@
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[0], sample0_ch1) << 1;
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[1], sample1_ch1) << 1;
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[0], sample0_ch2) << 1;
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[1], sample1_ch2 ) << 1;
@
@ b2_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1, state0_ch1);
@ b1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1, state1_ch1); //Q16+Q16=Q16
@ b2_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2, state0_ch2); //Q16+Q16=Q16
@ b1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2, state1_ch2); //Q16+Q16=Q16
@
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[0], (int16_t) (b2_ch1 >> 16));
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[1], (int16_t) (b1_ch1 >> 16));
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[0], (int16_t) (b2_ch2 >> 16));
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[1], (int16_t) (b1_ch2 >> 16));
@
@ state0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1<<1, (uint32_t)sample0_ch1<<16);
@ state1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1<<1, (uint32_t)sample1_ch1<<16);
@ state0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2<<1, (uint32_t)sample0_ch2<<16);
@ state1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2<<1, (uint32_t)sample1_ch2<<16);
@
@
@ sample1_ch1 = data_ch1[n + 3];
@ sample0_ch1 = (int16_t) (b2_ch1 >> 16); //Save as Q0
@ sample1_ch2 = data_ch2[n + 3];
@ sample0_ch2 = (int16_t) (b2_ch2 >> 16); //Save as Q0
@
@ data_ch1[n] = (int16_t) (b0_ch1 >> 16); //Save as Q0
@ data_ch1[n + 1] = (int16_t) (b1_ch1 >> 16); //Save as Q0
@ data_ch2[n] = (int16_t) (b0_ch2 >> 16);
@ data_ch2[n + 1] = (int16_t) (b1_ch2 >> 16);
@ }
@
@ // Loop unrolling post-processing.
@
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[0], sample1_ch1) << 1;
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[1], sample0_ch1) << 1;
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[0], sample1_ch2 ) << 1;
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[1], sample0_ch2) << 1;
@
@ b1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1, state0_ch1);
@ b0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1, state1_ch1);
@ b1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2, state0_ch2);
@ b0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2, state1_ch2);
@
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[0], (int16_t) (b1_ch1 >> 16));
@ a0_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[1], (int16_t) (b0_ch1 >> 16));
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[0], (int16_t) (b1_ch2 >> 16));
@ a0_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[1], (int16_t) (b0_ch2 >> 16));
@
@ state0_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1<<1, (uint32_t)sample1_ch1 << 16);
@ state1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a0_ch1<<1, (uint32_t)sample0_ch1 << 16);
@ state0_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2<<1, (uint32_t)sample1_ch2 << 16);
@ state1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a0_ch2<<1, (uint32_t)sample0_ch2 << 16);
@
@ data_ch1[n] = (int16_t) (b0_ch1 >> 16); //Save as Q0
@ data_ch2[n] = (int16_t) (b0_ch2 >> 16);
@
@ sample1_ch1 = (int16_t) (b1_ch1 >> 16); //Save as Q0
@ sample1_ch2 = (int16_t) (b1_ch2 >> 16); //Save as Q0
@
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(factor_ch1[1], sample1_ch1) << 1;
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(factor_ch2[1], sample1_ch2 ) << 1;
@
@ b1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1, state1_ch1); //Q16+Q16=Q16
@ b1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2, state1_ch2); //Q16+Q16=Q16
@
@ a1_ch1 = WEBRTC_SPL_MUL_16_16(-factor_ch1[1], (int16_t) (b1_ch1 >> 16));
@ a1_ch2 = WEBRTC_SPL_MUL_16_16(-factor_ch2[1], (int16_t) (b1_ch2 >> 16));
@
@ state1_ch1 = WEBRTC_SPL_ADD_SAT_W32(a1_ch1<<1, (uint32_t)sample1_ch1<<16);
@ state1_ch2 = WEBRTC_SPL_ADD_SAT_W32(a1_ch2<<1, (uint32_t)sample1_ch2<<16);
@
@ data_ch1[n + 1] = (int16_t) (b1_ch1 >> 16); //Save as Q0
@ data_ch2[n + 1] = (int16_t) (b1_ch2 >> 16);
@
@ filter_state_ch1[0] = state0_ch1;
@ filter_state_ch1[1] = state1_ch1;
@ filter_state_ch2[0] = state0_ch2;
@ filter_state_ch2[1] = state1_ch2;
@}

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "gtest/gtest.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_tables.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
#include "webrtc/typedefs.h"
class FilterBanksTest : public testing::Test {
protected:
// Pass a function pointer to the Tester function.
void CalculateResidualEnergyTester(AllpassFilter2FixDec16
AllpassFilter2FixDec16Function) {
const int kSamples = QLOOKAHEAD;
const int kState = 2;
int16_t data_ch1[kSamples] = {0};
int16_t data_ch2[kSamples] = {0};
int32_t state_ch1[kState] = {0};
int32_t state_ch2[kState] = {0};
const int32_t out_state_ch1[kState] = {-809122714, 1645972152};
const int32_t out_state_ch2[kState] = {428019288, 1057309936};
const int32_t out_data_ch1[kSamples] = {0, 0, 347, 10618, 16718, -7089,
32767, 16913, 27042, 8377, -22973, -28372, -27603, -14804, 398, -25332,
-11200, 18044, 25223, -6839, 1116, -23984, 32717, 7364};
const int32_t out_data_ch2[kSamples] = {0, 0, 3010, 22351, 21106, 16969,
-2095, -664, 3513, -30980, 32767, -23839, 13335, 20289, -6831, 339,
-17207, 32767, 4959, 6177, 32767, 16599, -4747, 20504};
int sign = 1;
for (int i = 0; i < kSamples; i++) {
sign *= -1;
data_ch1[i] = sign * WEBRTC_SPL_WORD32_MAX / (i * i + 1);
data_ch2[i] = sign * WEBRTC_SPL_WORD32_MIN / (i * i + 1);
};
AllpassFilter2FixDec16Function(data_ch1,
data_ch2,
WebRtcIsacfix_kUpperApFactorsQ15,
WebRtcIsacfix_kLowerApFactorsQ15,
kSamples,
state_ch1,
state_ch2);
for (int i = 0; i < kSamples; i++) {
EXPECT_EQ(out_data_ch1[i], data_ch1[i]);
EXPECT_EQ(out_data_ch2[i], data_ch2[i]);
}
for (int i = 0; i < kState; i++) {
EXPECT_EQ(out_state_ch1[i], state_ch1[i]);
EXPECT_EQ(out_state_ch2[i], state_ch2[i]);
}
}
};
TEST_F(FilterBanksTest, AllpassFilter2FixDec16Test) {
CalculateResidualEnergyTester(WebRtcIsacfix_AllpassFilter2FixDec16C);
#ifdef WEBRTC_DETECT_ARM_NEON
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
CalculateResidualEnergyTester(WebRtcIsacfix_AllpassFilter2FixDec16Neon);
}
#elif defined(WEBRTC_ARCH_ARM_NEON)
CalculateResidualEnergyTester(WebRtcIsacfix_AllpassFilter2FixDec16Neon);
#endif
}
TEST_F(FilterBanksTest, HighpassFilterFixDec32Test) {
const int kSamples = 20;
int16_t in[kSamples];
int32_t state[2] = {12345, 987654};
#ifdef WEBRTC_ARCH_ARM_V7
int32_t out[kSamples] = {-1040, -1035, -22875, -1397, -27604, 20018, 7917,
-1279, -8552, -14494, -7558, -23537, -27258, -30554, -32768, -3432, -32768,
25215, -27536, 22436};
#else
int32_t out[kSamples] = {-1040, -1035, -22875, -1397, -27604, 20017, 7915,
-1280, -8554, -14496, -7561, -23541, -27263, -30560, -32768, -3441, -32768,
25203, -27550, 22419};
#endif
for (int i = 0; i < kSamples; i++) {
in[i] = WEBRTC_SPL_WORD32_MAX / (i + 1);
}
WebRtcIsacfix_HighpassFilterFixDec32(in, kSamples,
WebRtcIsacfix_kHPStCoeffOut1Q30, state);
for (int i = 0; i < kSamples; i++) {
EXPECT_EQ(out[i], in[i]);
}
}

View File

@ -22,6 +22,7 @@
#include "modules/audio_coding/codecs/isac/fix/source/bandwidth_estimator.h"
#include "modules/audio_coding/codecs/isac/fix/source/codec.h"
#include "modules/audio_coding/codecs/isac/fix/source/entropy_coding.h"
#include "modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
#include "modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
#include "modules/audio_coding/codecs/isac/fix/source/structs.h"
#include "system_wrappers/interface/cpu_features_wrapper.h"
@ -183,6 +184,8 @@ static void WebRtcIsacfix_InitNeon(void) {
WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopNeon;
WebRtcIsacfix_CalculateResidualEnergy =
WebRtcIsacfix_CalculateResidualEnergyNeon;
WebRtcIsacfix_AllpassFilter2FixDec16 =
WebRtcIsacfix_AllpassFilter2FixDec16Neon;
}
#endif
@ -269,6 +272,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopC;
WebRtcIsacfix_CalculateResidualEnergy =
WebRtcIsacfix_CalculateResidualEnergyC;
WebRtcIsacfix_AllpassFilter2FixDec16 =
WebRtcIsacfix_AllpassFilter2FixDec16C;
#ifdef WEBRTC_DETECT_ARM_NEON
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {

View File

@ -97,6 +97,7 @@
'<(webrtc_root)/common_audio/common_audio.gyp:signal_processing',
],
'sources': [
'filterbanks_neon.S',
'filters_neon.c',
'lattice_neon.S',
'lpc_masking_model_neon.S',

View File

@ -7,15 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <typedefs.h>
#include "gtest/gtest.h"
#include "modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
#include "modules/audio_coding/codecs/isac/fix/source/filterbank_tables.h"
#include "modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
#include "system_wrappers/interface/cpu_features_wrapper.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
#include "webrtc/typedefs.h"
class IsacUnitTest : public testing::Test {
class LpcMaskingModelTest : public testing::Test {
protected:
// Pass a function pointer to the Tester function.
void CalculateResidualEnergyTester(CalculateResidualEnergy
@ -38,7 +36,7 @@ class IsacUnitTest : public testing::Test {
// Test the code path where (residual_energy < 0x10000)
// and ((energy & 0x8000) != 0).
for(int i = 0; i < kIntOrder + 1; i++) {
for (int i = 0; i < kIntOrder + 1; i++) {
a[i] = 24575 >> i;
corr[i] = i;
}
@ -48,7 +46,7 @@ class IsacUnitTest : public testing::Test {
EXPECT_EQ(26, q_shift_residual);
// Test the code path where (residual_energy <= 0x7fff).
for(int i = 0; i < kIntOrder + 1; i++) {
for (int i = 0; i < kIntOrder + 1; i++) {
a[i] = 2457 >> i;
}
residual_energy = CalculateResidualEnergyFunction(kIntOrder,
@ -58,7 +56,7 @@ class IsacUnitTest : public testing::Test {
}
};
TEST_F(IsacUnitTest, CalculateResidualEnergyTest) {
TEST_F(LpcMaskingModelTest, CalculateResidualEnergyTest) {
CalculateResidualEnergyTester(WebRtcIsacfix_CalculateResidualEnergyC);
#ifdef WEBRTC_DETECT_ARM_NEON
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
@ -68,29 +66,3 @@ TEST_F(IsacUnitTest, CalculateResidualEnergyTest) {
CalculateResidualEnergyTester(WebRtcIsacfix_CalculateResidualEnergyNeon);
#endif
}
TEST_F(IsacUnitTest, HighpassFilterFixDec32Test) {
const int kSamples = 20;
int16_t in[kSamples];
int32_t state[2] = {12345, 987654};
#ifdef WEBRTC_ARCH_ARM_V7
int32_t out[kSamples] = {-1040, -1035, -22875, -1397, -27604, 20018, 7917,
-1279, -8552, -14494, -7558, -23537, -27258, -30554, -32768, -3432, -32768,
25215, -27536, 22436};
#else
int32_t out[kSamples] = {-1040, -1035, -22875, -1397, -27604, 20017, 7915,
-1280, -8554, -14496, -7561, -23541, -27263, -30560, -32768, -3441, -32768,
25203, -27550, 22419};
#endif
for(int i = 0; i < kSamples; i++) {
in[i] = WEBRTC_SPL_WORD32_MAX / (i + 1);
}
WebRtcIsacfix_HighpassFilterFixDec32(in, kSamples,
WebRtcIsacfix_kHPStCoeffOut1Q30, state);
for(int i = 0; i < kSamples; i++) {
EXPECT_EQ(out[i], in[i]);
}
}

View File

@ -32,7 +32,8 @@
'<(webrtc_root)/test/test.gyp:test_support_main',
],
'sources': [
'fix/test/isacfix_unittest.cc',
'fix/source/lpc_masking_model_unittest.cc',
'fix/source/filterbanks_unittest.cc',
],
},
],