Optimized WebRtcIsacfix_Spec2Time() for iSAC-Fix in ARM Neon processor. Speed doubled.

Review URL: https://webrtc-codereview.appspot.com/930033

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3274 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org
2012-12-12 23:00:52 +00:00
parent 1b60ceb499
commit fa5b6bf4f4
9 changed files with 573 additions and 14 deletions

View File

@@ -90,7 +90,8 @@ LOCAL_SRC_FILES := \
filterbanks_neon.S \
filters_neon.S \
lattice_neon.S \
lpc_masking_model_neon.S
lpc_masking_model_neon.S \
transform_neon.S
# Flags passed to both C and C++ files.
LOCAL_CFLAGS := \

View File

@@ -75,12 +75,23 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9,
WebRtc_Word16 *outre,
WebRtc_Word16 *outim);
typedef void (*Spec2Time)(WebRtc_Word16* inreQ7,
WebRtc_Word16* inimQ7,
WebRtc_Word32* outre1Q16,
WebRtc_Word32* outre2Q16);
extern Spec2Time WebRtcIsacfix_Spec2Time;
void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16* inreQ7,
WebRtc_Word16* inimQ7,
WebRtc_Word32* outre1Q16,
WebRtc_Word32* outre2Q16);
void WebRtcIsacfix_Spec2Time(WebRtc_Word16 *inreQ7,
WebRtc_Word16 *inimQ7,
WebRtc_Word32 *outre1Q16,
WebRtc_Word32 *outre2Q16);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16* inreQ7,
WebRtc_Word16* inimQ7,
WebRtc_Word32* outre1Q16,
WebRtc_Word32* outre2Q16);
#endif

View File

@@ -182,6 +182,7 @@ WebRtc_Word16 WebRtcIsacfix_FreeInternal(ISACFIX_MainStruct *ISAC_main_inst)
static void WebRtcIsacfix_InitNeon(void) {
WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrNeon;
WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopNeon;
WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeNeon;
WebRtcIsacfix_CalculateResidualEnergy =
WebRtcIsacfix_CalculateResidualEnergyNeon;
WebRtcIsacfix_AllpassFilter2FixDec16 =
@@ -274,6 +275,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
WebRtcIsacfix_CalculateResidualEnergyC;
WebRtcIsacfix_AllpassFilter2FixDec16 =
WebRtcIsacfix_AllpassFilter2FixDec16C;
WebRtcIsacfix_Spec2Time =
WebRtcIsacfix_Spec2TimeC;
#ifdef WEBRTC_DETECT_ARM_NEON
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {

View File

@@ -101,6 +101,7 @@
'filters_neon.S',
'lattice_neon.S',
'lpc_masking_model_neon.S',
'transform_neon.S',
],
},
],

View File

@@ -15,13 +15,14 @@
*
*/
#include "fft.h"
#include "codec.h"
#include "settings.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
/* Cosine table 1 in Q14 */
static const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
@@ -50,7 +51,7 @@ static const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
/* Sine table 1 in Q14 */
static const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926,
2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033,
4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071,
@@ -79,7 +80,7 @@ static const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
/* Cosine table 2 in Q14 */
static const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = {
const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = {
107, -322, 536, -750, 965, -1179, 1392, -1606, 1819, -2032,
2245, -2457, 2669, -2880, 3091, -3301, 3511, -3720, 3929, -4137,
4344, -4550, 4756, -4961, 5165, -5368, 5570, -5771, 5971, -6171,
@@ -96,7 +97,7 @@ static const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = {
/* Sine table 2 in Q14 */
static const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853,
15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178,
@@ -111,7 +112,8 @@ static const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
};
// Declare a function pointer.
Spec2Time WebRtcIsacfix_Spec2Time;
void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9,
WebRtc_Word16 *inre2Q9,
@@ -200,7 +202,7 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9,
}
void WebRtcIsacfix_Spec2Time(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebRtc_Word32 *outre1Q16, WebRtc_Word32 *outre2Q16)
void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebRtc_Word32 *outre1Q16, WebRtc_Word32 *outre2Q16)
{
int k;

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_
#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
#include "webrtc/typedefs.h"
/* Cosine table 1 in Q14 */
extern const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2];
/* Sine table 1 in Q14 */
extern const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2];
/* Cosine table 2 in Q14 */
extern const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4];
/* Sine table 2 in Q14 */
extern const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4];
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ */

View File

@@ -0,0 +1,382 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ Reference code in transform.c. Bit not exact due to how rounding is
@ done in C code and ARM instructions, but quality by assembly code is
@ not worse.
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
#include "webrtc/system_wrappers/interface/asm_defines.h"
GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon
@ void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16 *inreQ7,
@ WebRtc_Word16 *inimQ7,
@ WebRtc_Word32 *outre1Q16,
@ WebRtc_Word32 *outre2Q16);
DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon
.align 2
push {r4-r11,lr}
vpush {q4-q7}
sub sp, sp, #16
str r0, [sp] @ inreQ7
str r1, [sp, #4] @ inimQ7
str r2, [sp, #8] @ outre1Q16
str r3, [sp, #12] @ outre2Q16
mov r8, #(FRAMESAMPLES - 16)
add r12, r0, r8 @ &inreQ7[FRAMESAMPLES/2 - 8]
add r11, r1, r8 @ &inimQ7[FRAMESAMPLES/2 - 8]
add r4, r2, r8, lsl #1 @ &outRe1Q16[FRAMESAMPLES/2 - 8]
add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES/2 - 8]
mov r8, #(FRAMESAMPLES / 2) @ loop counter
ldr r9, =kCosTab2
ldr r10, =kSinTab2
mov r5, #-32
mov r7, #-16
vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm.
vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe.
TRANSFORM_AND_FIND_MAX:
@ Use ">> 5", instead of "<< 9" and then ">> 14" as in the C code.
@ Bit-exact.
vld1.16 {q0}, [r9]! @ kCosTab2[]
vld1.16 {q1}, [r10]! @ kSinTab2[]
vld1.16 {q2}, [r0]! @ inreQ7[]
vld1.16 {q3}, [r1]! @ inimQ7[]
vmull.s16 q8, d0, d4 @ kCosTab2[k] x inreQ7[k]
vmull.s16 q9, d1, d5 @ kCosTab2[k] x inreQ7[k]
vmull.s16 q10, d2, d6 @ kSinTab2[k] x inimQ7[k]
vmull.s16 q11, d3, d7 @ kSinTab2[k] x inimQ7[k]
vmull.s16 q12, d0, d6 @ kCosTab2[k] x inimQ7[k]
vmull.s16 q13, d1, d7 @ kCosTab2[k] x inimQ7[k]
vmull.s16 q14, d2, d4 @ kSinTab2[k] x inreQ7[k]
vmull.s16 q15, d3, d5 @ kSinTab2[k] x inreQ7[k]
vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES/2 - 9 - i]
vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES/2 - 9 - i]
vadd.s32 q8, q8, q10
vadd.s32 q9, q9, q11
vsub.s32 q12, q12, q14
vsub.s32 q13, q13, q15
subs r8, #16
vrev64.16 q2, q2 @ Reverse the order of the samples
vrev64.16 q3, q3 @ Reverse the order of the samples
vshr.s32 q8, q8, #5 @ xrQ16
vshr.s32 q9, q9, #5 @ xrQ16
vshr.s32 q12, q12, #5 @ xiQ16
vshr.s32 q13, q13, #5 @ xiQ16
vmull.s16 q10, d0, d7 @ kCosTab2[k] * inreQ7[k]
vmull.s16 q11, d1, d6 @ kCosTab2[k] * inreQ7[k]
vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k]
vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k]
vmull.s16 q4, d0, d5 @ kCosTab2[k] * inimQ7[]
vmull.s16 q5, d1, d4 @ kCosTab2[k] * inimQ7[]
vmull.s16 q0, d2, d7 @ kSinTab2[k] * inreQ7[]
vmull.s16 q2, d3, d6 @ kSinTab2[k] * inreQ7[]
vsub.s32 q14, q14, q10 @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k]
vsub.s32 q15, q15, q11 @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k]
vadd.s32 q10, q4, q0 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
vadd.s32 q11, q5, q2 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
vshr.s32 q14, q14, #5 @ yiQ16
vshr.s32 q15, q15, #5 @ yiQ16
vneg.s32 q10, q10
vneg.s32 q11, q11
@ xrQ16 - yiQ16
vsub.s32 q0, q8, q14
vsub.s32 q1, q9, q15
vshr.s32 q10, q10, #5 @ yrQ16
vshr.s32 q11, q11, #5 @ yrQ16
@ xrQ16 + yiQ16
vadd.s32 q3, q8, q14
vadd.s32 q2, q9, q15
@ yrQ16 + xiQ16
vadd.s32 q4, q10, q12
vadd.s32 q5, q11, q13
@ yrQ16 - xiQ16
vsub.s32 q9, q10, q12
vsub.s32 q8, q11, q13
@ Reverse the order of the samples
vrev64.32 q2, q2
vrev64.32 q3, q3
vrev64.32 q8, q8
vrev64.32 q9, q9
vswp d4, d5
vswp d6, d7
vswp d16, d17
vswp d18, d19
vst1.32 {q0, q1}, [r2]! @ outre1Q16[k]
vst1.32 {q2, q3}, [r4], r5 @ outre1Q16[FRAMESAMPLES/2 - 1 - k]
vst1.32 {q4, q5}, [r3]! @ outre2Q16[k]
vst1.32 {q8, q9}, [r6], r5 @ outre2Q16[FRAMESAMPLES/2 - 1 - k]
@ Find the absolute maximum in the vectors and store them in q6 and q7.
vabs.s32 q10, q0
vabs.s32 q11, q1
vabs.s32 q12, q2
vabs.s32 q13, q3
vabs.s32 q14, q4
vmax.u32 q6, q10 @ Use u32 so we don't lose the value 0x80000000.
vmax.u32 q7, q14 @ Maximum for outre2Q16[].
vabs.s32 q15, q5
vmax.u32 q6, q11 @ Maximum for outre1Q16[].
vmax.u32 q7, q15
vabs.s32 q0, q8
vmax.u32 q6, q12
vmax.u32 q7, q0
vabs.s32 q1, q9
vmax.u32 q6, q13
vmax.u32 q7, q1
bgt TRANSFORM_AND_FIND_MAX
@ Find the maximum value in the Neon registers
vmax.u32 d12, d13
vmax.u32 d14, d15
vpmax.u32 d12, d12, d12 @ Both 32 bits words hold the same value tmpInIm.
vpmax.u32 d14, d14, d14 @ Both 32 bits words hold the same value tmpInRe.
vmax.s32 d14, d12, d14 @ if (tmpInIm>tmpInRe) tmpInRe = tmpInIm;
ldr r4, [sp] @ inreQ7
vcls.s32 d15, d14 @ sh = WebRtcSpl_NormW32(tmpInRe);
ldr r5, [sp, #4] @ inimQ7
vmov.i32 d14, #24 @ sh = sh-24;
ldr r6, [sp, #8] @ outre1Q16
vsub.s32 d15, d15, d14
ldr r7, [sp, #12] @ outre2Q16
vdup.s32 q8, d15[0] @ sh
mov r8, #(FRAMESAMPLES / 2)
PRE_FFT_SHIFT:
vld1.32 {q0, q1}, [r6]! @ outre1Q16[]
vld1.32 {q2, q3}, [r6]! @ outre1Q16[]
vld1.32 {q4, q5}, [r7]! @ outre2Q16[]
vld1.32 {q6, q7}, [r7]! @ outre2Q16[]
subs r8, #16
vrshl.s32 q0, q0, q8
vrshl.s32 q1, q1, q8
vrshl.s32 q2, q2, q8
vrshl.s32 q3, q3, q8
vrshl.s32 q4, q4, q8
vrshl.s32 q5, q5, q8
vrshl.s32 q6, q6, q8
vrshl.s32 q7, q7, q8
vmovn.s32 d0, q0
vmovn.s32 d1, q1
vmovn.s32 d2, q2
vmovn.s32 d3, q3
vmovn.s32 d4, q4
vmovn.s32 d5, q5
vmovn.s32 d6, q6
vmovn.s32 d7, q7
vst1.16 {q0, q1}, [r4]! @ inreQ7[]
vst1.16 {q2, q3}, [r5]! @ inimQ7[]
bgt PRE_FFT_SHIFT
ldr r0, [sp] @ inreQ7
ldr r1, [sp, #4] @ inimQ7
mov r2, #1
bl WebRtcIsacfix_FftRadix16Fastest(PLT)
ldr r4, [sp] @ inreQ7
ldr r5, [sp, #4] @ inimQ7
ldr r6, [sp, #8] @ outre1Q16
ldr r7, [sp, #12] @ outre2Q16
mov r8, #(FRAMESAMPLES / 2)
vneg.s32 q5, q8 @ -sh
movw r0, #273
vdup.s32 d8, r0
POST_FFT_SHIFT_DIVIDE:
vld1.16 {q0, q1}, [r4]! @ inreQ7
vld1.16 {q2, q3}, [r5]! @ inimQ7
subs r8, #16
vmovl.s16 q6, d0
vmovl.s16 q7, d1
vmovl.s16 q8, d2
vmovl.s16 q9, d3
vmovl.s16 q0, d4
vmovl.s16 q1, d5
vmovl.s16 q2, d6
vmovl.s16 q3, d7
vshl.s32 q6, q6, q5
vshl.s32 q7, q7, q5
vshl.s32 q8, q8, q5
vshl.s32 q9, q9, q5
vshl.s32 q0, q0, q5
vshl.s32 q1, q1, q5
vshl.s32 q2, q2, q5
vshl.s32 q3, q3, q5
@ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre1Q16[k])
vmull.s32 q10, d12, d8
vmull.s32 q11, d13, d8
vmull.s32 q12, d14, d8
vmull.s32 q13, d15, d8
vshrn.s64 d12, q10, #16
vshrn.s64 d13, q11, #16
vshrn.s64 d14, q12, #16
vshrn.s64 d15, q13, #16
vmull.s32 q10, d16, d8
vmull.s32 q11, d17, d8
vmull.s32 q12, d18, d8
vmull.s32 q13, d19, d8
vshrn.s64 d16, q10, #16
vshrn.s64 d17, q11, #16
vshrn.s64 d18, q12, #16
vshrn.s64 d19, q13, #16
@ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre2Q16[k])
vmull.s32 q10, d0, d8
vmull.s32 q11, d1, d8
vmull.s32 q12, d2, d8
vmull.s32 q13, d3, d8
vshrn.s64 d0, q10, #16
vshrn.s64 d1, q11, #16
vshrn.s64 d2, q12, #16
vshrn.s64 d3, q13, #16
vmull.s32 q10, d4, d8
vmull.s32 q11, d5, d8
vmull.s32 q12, d6, d8
vmull.s32 q13, d7, d8
vshrn.s64 d4, q10, #16
vshrn.s64 d5, q11, #16
vshrn.s64 d6, q12, #16
vshrn.s64 d7, q13, #16
vst1.32 {q6, q7}, [r6]! @ outre1Q16[]
vst1.32 {q8, q9}, [r6]! @ outre1Q16[]
vst1.32 {q0, q1}, [r7]! @ outre2Q16[]
vst1.32 {q2, q3}, [r7]! @ outre2Q16[]
bgt POST_FFT_SHIFT_DIVIDE
mov r8, #(FRAMESAMPLES / 2)
ldr r9, =kCosTab1
ldr r10, =kSinTab1
ldr r2, [sp, #8] @ outre1Q16
ldr r3, [sp, #12] @ outre2Q16
movw r0, #31727
DEMODULATE_AND_SEPARATE:
vld1.16 {q0}, [r9]! @ kCosTab1[]
vld1.16 {q1}, [r10]! @ kSinTab1[]
vld1.32 {q2, q3}, [r2] @ outre1Q16
vld1.32 {q4, q5}, [r3] @ outre2Q16
vmovl.s16 q6, d0 @ kCosTab1[]
vmovl.s16 q7, d1 @ kCosTab1[]
vmovl.s16 q8, d2 @ kSinTab1[]
vmovl.s16 q9, d3 @ kSinTab1[]
vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q11, d13, d5 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q0, d16, d8 @ kSinTab1[k] * outre2Q16[k]
vmull.s32 q1, d17, d9 @ kSinTab1[k] * outre2Q16[k]
vmull.s32 q14, d18, d10 @ kSinTab1[k] * outre2Q16[k]
vmull.s32 q15, d19, d11 @ kSinTab1[k] * outre2Q16[k]
vsub.s64 q10, q10, q0
vsub.s64 q11, q11, q1
vsub.s64 q12, q12, q14
vsub.s64 q13, q13, q15
vrshrn.s64 d20, q10, #14 @ xrQ16
vrshrn.s64 d21, q11, #14 @ xrQ16
vrshrn.s64 d22, q12, #14 @ xrQ16
vrshrn.s64 d23, q13, #14 @ xrQ16
subs r8, #8
vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k]
vdup.s32 d9, r0 @ generic -> Neon doesn't cost extra cycles.
vmull.s32 q0, d16, d4 @ kSinTab1[k] * outre1Q16[k]
vmull.s32 q1, d17, d5 @ kSinTab1[k] * outre1Q16[k]
vmull.s32 q6, d18, d6 @ kSinTab1[k] * outre1Q16[k]
vmull.s32 q7, d19, d7 @ kSinTab1[k] * outre1Q16[k]
vadd.s64 q12, q12, q0
vadd.s64 q13, q13, q1
vadd.s64 q14, q14, q6
vadd.s64 q15, q15, q7
vrshrn.s64 d24, q12, #14 @ xiQ16
vrshrn.s64 d25, q13, #14 @ xiQ16
vrshrn.s64 d26, q14, #14 @ xiQ16
vrshrn.s64 d27, q15, #14 @ xiQ16
@ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16)
vmull.s32 q0, d20, d9
vmull.s32 q1, d21, d9
vmull.s32 q2, d22, d9
vmull.s32 q3, d23, d9
vrshrn.s64 d0, q0, #11
vrshrn.s64 d1, q1, #11
vrshrn.s64 d2, q2, #11
vrshrn.s64 d3, q3, #11
@ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xiQ16)
vmull.s32 q6, d24, d9
vmull.s32 q7, d25, d9
vmull.s32 q8, d26, d9
vmull.s32 q9, d27, d9
vrshrn.s64 d4, q6, #11
vrshrn.s64 d5, q7, #11
vrshrn.s64 d6, q8, #11
vrshrn.s64 d7, q9, #11
vst1.16 {q0, q1}, [r2]! @ outre1Q16[]
vst1.16 {q2, q3}, [r3]! @ outre2Q16[]
bgt DEMODULATE_AND_SEPARATE
add sp, sp, #16
vpop {q4-q7}
pop {r4-r11,pc}

View File

@@ -0,0 +1,121 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
class TransformTest : public testing::Test {
protected:
TransformTest() {
WebRtcSpl_Init();
}
// Pass a function pointer to the Tester function.
void Spec2TimeTester(Spec2Time Spec2TimeFunction) {
// WebRtcIsacfix_Spec2Time functions hard coded the buffer lengths. It's a
// large buffer but we have to test it here.
const int kSamples = FRAMESAMPLES/2;
int16_t data_in_1[kSamples] = {0};
int16_t data_in_2[kSamples] = {0};
int32_t data_out_1[kSamples] = {0};
int32_t data_out_2[kSamples] = {0};
int32_t out_expected_1[kSamples]= {-3366470, -2285227, -3415765,
-2310215, -3118030, -2222470, -3030254, -2192091, -3423170, -2216041,
-3305541, -2171936, -3195767, -2095779, -3153304, -2157560, -3071167,
-2032108, -3101190, -1972016, -3103824, -2089118, -3139811, -1898337,
-3102801, -2055082, -3029665, -1854140, -2962586, -1966454, -3071167,
-1894588, -2851743, -1917315, -2848087, -1594932, -2799242, -1462184,
-2845887, -1437599, -2691776, -1329637, -2770659, -1268491, -2625161,
-1578991, -2460299, -1186385, -2365613, -1039354, -2322608, -958518,
-2271749, -789860, -2254538, -850308, -2384436, -850959, -2133734,
-587678, -2093316, -495115, -1973364, -475177, -1801282, -173507,
-1848516, -158015, -1792018, -62648, -1643313, 214746, -1500758, 267077,
-1450193, 560521, -1521579, 675283, -1345408, 857559, -1300822, 1116332,
-1294533, 1241117, -1070027, 1263503, -983816, 1529821, -1019586,
1910421, -955420, 2073688, -836459, 2401105, -653905, 2690474, -731425,
2930131, -935234, 3299500, -875978, 3523432, -878906, 3924822, -1081630,
4561267, -1203023, 5105274, -1510983, 6052762, -2294646, 7021597,
-3108053, 8826736, -4935222, 11678789, -8442713, 18725700, -21526692,
25420577, 19589811, -28108666, 12634054, -14483066, 6263217, -9979706,
3665661, -7909736, 2531530, -6434896, 1700772, -5525393, 1479473,
-4894262, 1231760, -4353044, 1032940, -3786590, 941152, -3331614,
665090, -2851619, 830696, -2762201, 958007, -2483118, 788233, -2184965,
804825, -1967306, 1007255, -1862474, 920889, -1457506, 755406, -1405841,
890230, -1302124, 1161599, -701867, 1154163, -1083366, 1204743, -513581,
1547264, -650636, 1493384, -285543, 1771863, -277906, 1841343, -9078,
1751863, 230222, 1819578, 207170, 1978972, 398137, 2106468, 552155,
1997624, 685213, 2129520, 601078, 2238736, 944591, 2441879, 1194178,
2355280, 986124, 2393328, 1049005, 2417944, 1208368, 2489516, 1352023,
2572118, 1445283, 2856081, 1532997, 2742279, 1615877, 2915274, 1808036,
2856871, 1806936, 3241747, 1622461, 2978558, 1841297, 3010378, 1923666,
3271367, 2126700, 3070935, 1956958, 3107588, 2128405, 3288872, 2114911,
3315952, 2406651, 3344038, 2370199, 3368980, 2144361, 3305030, 2183803,
3401450, 2523102, 3405463, 2452475, 3463355, 2421678, 3551968, 2431949,
3477251, 2148125, 3244489, 2174090};
int32_t out_expected_2[kSamples]= {1691694, -2499988, -2035547,
1060469, 988634, -2044502, -306271, 2041000, 201454, -2289456, 93694,
2129427, -369152, -1887834, 860796, 2089102, -929424, -1673956, 1395291,
1785651, -1619673, -1380109, 1963449, 1093311, -2111007, -840456,
2372786, 578119, -2242702, 89774, 2463304, -132717, -2121480, 643634,
2277636, -1125999, -1995858, 1543748, 2227861, -1483779, -1495491,
2102642, 1833876, -1920568, -958378, 2485101, 772261, -2454257, -24942,
2918714, 136838, -2500453, 816118, 3039735, -746560, -2365815, 1586396,
2714951, -1511696, -1942334, 2571792, 2182827, -2325335, -1311543,
3055970, 1367220, -2737182, -110626, 3889222, 631008, -3280879, 853066,
4122279, -706638, -3334449, 2148311, 3993512, -1846301, -3004894,
3426779, 3329522, -3165264, -2242423, 4756866, 2557711, -4131280,
-805259, 5702711, 1120592, -4852821, 743664, 6476444, -621186, -5465828,
2815787, 6768835, -3017442, -5338409, 5658126, 6838454, -5492288,
-4682382, 8874947, 6153814, -8832561, -2649251, 12817398, 4237692,
-13000247, 1190661, 18986363, -115738, -19693978, 9908367, 30660381,
-10632635, -37962068, 47022884, 89744622, -42087632, 40279224,
-88869341, -47542383, 38572364, 10441576, -30339718, -9926740, 19896578,
28009, -18886612, -1124047, 13232498, -4150304, -12770551, 2637074,
9051831, -6162211, -8713972, 4557937, 5489716, -6862312, -5532349,
5415449, 2791310, -6999367, -2790102, 5375806, 546222, -6486452,
-821261, 4994973, -1278840, -5645501, 1060484, 3996285, -2503954,
-4653629, 2220549, 3036977, -3282133, -3318585, 2780636, 1789880,
-4004589, -2041031, 3105373, 574819, -3992722, -971004, 3001703,
-676739, -3841508, 417284, 2897970, -1427018, -3058480, 1189948,
2210960, -2268992, -2603272, 1949785, 1576172, -2720404, -1891738,
2309456, 769178, -2975646, -707150, 2424652, -88039, -2966660, -65452,
2320780, -957557, -2798978, 744640, 1879794, -1672081, -2365319,
1253309, 1366383, -2204082, -1544367, 1801452, 613828, -2531994,
-983847, 2064842, 118326, -2613790, -203220, 2219635, -730341, -2641861,
563557, 1765434, -1329916, -2272927, 1037138, 1266725, -1939220,
-1588643, 1754528, 816552, -2376303, -1099167, 1864999, 122477,
-2422762, -400027, 1889228, -579916, -2490353, 287139, 2011318,
-1176657, -2502978, 812896, 1116502, -1940211};
for(int i = 0; i < kSamples; i++) {
data_in_1[i] = i * i + 1777;
data_in_2[i] = WEBRTC_SPL_WORD16_MAX / (i + 1) + 17;
}
Spec2TimeFunction(data_in_1, data_in_2, data_out_1, data_out_2);
for (int i = 0; i < kSamples; i++) {
// We don't require bit-exact for ARM assembly code.
EXPECT_LE(abs(out_expected_1[i] - data_out_1[i]), 16);
EXPECT_LE(abs(out_expected_2[i] - data_out_2[i]), 16);
}
}
};
TEST_F(TransformTest, Spec2TimeTest) {
Spec2TimeTester(WebRtcIsacfix_Spec2TimeC);
#ifdef WEBRTC_DETECT_ARM_NEON
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
Spec2TimeTester(WebRtcIsacfix_Spec2TimeNeon);
}
#elif defined(WEBRTC_ARCH_ARM_NEON)
Spec2TimeTester(WebRtcIsacfix_Spec2TimeNeon);
#endif
}

View File

@@ -166,6 +166,7 @@
'../../codecs/isac/fix/source/filters_unittest.cc',
'../../codecs/isac/fix/source/filterbanks_unittest.cc',
'../../codecs/isac/fix/source/lpc_masking_model_unittest.cc',
'../../codecs/isac/fix/source/transform_unittest.cc',
],
}, # audio_coding_unittests
],