Optimized WebRtcIsacfix_Spec2Time() for iSAC-Fix in ARM Neon processor. Speed doubled.
Review URL: https://webrtc-codereview.appspot.com/930033 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3274 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -90,7 +90,8 @@ LOCAL_SRC_FILES := \ | ||||
|     filterbanks_neon.S \ | ||||
|     filters_neon.S \ | ||||
|     lattice_neon.S \ | ||||
|     lpc_masking_model_neon.S | ||||
|     lpc_masking_model_neon.S \ | ||||
|     transform_neon.S | ||||
|  | ||||
| # Flags passed to both C and C++ files. | ||||
| LOCAL_CFLAGS := \ | ||||
|   | ||||
| @@ -75,12 +75,23 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9, | ||||
|                              WebRtc_Word16 *outre, | ||||
|                              WebRtc_Word16 *outim); | ||||
|  | ||||
| typedef void (*Spec2Time)(WebRtc_Word16* inreQ7, | ||||
|                           WebRtc_Word16* inimQ7, | ||||
|                           WebRtc_Word32* outre1Q16, | ||||
|                           WebRtc_Word32* outre2Q16); | ||||
| extern Spec2Time WebRtcIsacfix_Spec2Time; | ||||
|  | ||||
| void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16* inreQ7, | ||||
|                               WebRtc_Word16* inimQ7, | ||||
|                               WebRtc_Word32* outre1Q16, | ||||
|                               WebRtc_Word32* outre2Q16); | ||||
|  | ||||
| void WebRtcIsacfix_Spec2Time(WebRtc_Word16 *inreQ7, | ||||
|                              WebRtc_Word16 *inimQ7, | ||||
|                              WebRtc_Word32 *outre1Q16, | ||||
|                              WebRtc_Word32 *outre2Q16); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16* inreQ7, | ||||
|                                  WebRtc_Word16* inimQ7, | ||||
|                                  WebRtc_Word32* outre1Q16, | ||||
|                                  WebRtc_Word32* outre2Q16); | ||||
| #endif | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -182,6 +182,7 @@ WebRtc_Word16 WebRtcIsacfix_FreeInternal(ISACFIX_MainStruct *ISAC_main_inst) | ||||
| static void WebRtcIsacfix_InitNeon(void) { | ||||
|   WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrNeon; | ||||
|   WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopNeon; | ||||
|   WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeNeon; | ||||
|   WebRtcIsacfix_CalculateResidualEnergy = | ||||
|       WebRtcIsacfix_CalculateResidualEnergyNeon; | ||||
|   WebRtcIsacfix_AllpassFilter2FixDec16 = | ||||
| @@ -274,6 +275,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst, | ||||
|       WebRtcIsacfix_CalculateResidualEnergyC; | ||||
|   WebRtcIsacfix_AllpassFilter2FixDec16 = | ||||
|       WebRtcIsacfix_AllpassFilter2FixDec16C; | ||||
|   WebRtcIsacfix_Spec2Time = | ||||
|       WebRtcIsacfix_Spec2TimeC; | ||||
|  | ||||
| #ifdef WEBRTC_DETECT_ARM_NEON | ||||
|   if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { | ||||
|   | ||||
| @@ -101,6 +101,7 @@ | ||||
|             'filters_neon.S', | ||||
|             'lattice_neon.S', | ||||
|             'lpc_masking_model_neon.S', | ||||
|             'transform_neon.S', | ||||
|           ], | ||||
|         }, | ||||
|       ], | ||||
|   | ||||
| @@ -15,13 +15,14 @@ | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| #include "fft.h" | ||||
| #include "codec.h" | ||||
| #include "settings.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h" | ||||
|  | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" | ||||
|  | ||||
| /* Cosine table 1 in Q14 */ | ||||
| static const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { | ||||
| const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { | ||||
|   16384,  16383,  16378,  16371,  16362,  16349,  16333,  16315,  16294,  16270, | ||||
|   16244,  16214,  16182,  16147,  16110,  16069,  16026,  15980,  15931,  15880, | ||||
|   15826,  15769,  15709,  15647,  15582,  15515,  15444,  15371,  15296,  15218, | ||||
| @@ -50,7 +51,7 @@ static const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { | ||||
|  | ||||
|  | ||||
| /* Sine table 1 in Q14 */ | ||||
| static const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { | ||||
| const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { | ||||
|   0,   214,   429,   643,   857,  1072,  1285,  1499,  1713,  1926, | ||||
|   2139,  2351,  2563,  2775,  2986,  3196,  3406,  3616,  3825,  4033, | ||||
|   4240,  4447,  4653,  4859,  5063,  5266,  5469,  5671,  5872,  6071, | ||||
| @@ -79,7 +80,7 @@ static const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { | ||||
|  | ||||
|  | ||||
| /* Cosine table 2 in Q14 */ | ||||
| static const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = { | ||||
| const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = { | ||||
|   107,   -322,   536,   -750,   965,  -1179,  1392,  -1606,  1819,  -2032, | ||||
|   2245,  -2457,  2669,  -2880,  3091,  -3301,  3511,  -3720,  3929,  -4137, | ||||
|   4344,  -4550,  4756,  -4961,  5165,  -5368,  5570,  -5771,  5971,  -6171, | ||||
| @@ -96,7 +97,7 @@ static const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = { | ||||
|  | ||||
|  | ||||
| /* Sine table 2 in Q14 */ | ||||
| static const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { | ||||
| const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { | ||||
|   16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257, | ||||
|   16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853, | ||||
|   15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178, | ||||
| @@ -111,7 +112,8 @@ static const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { | ||||
|   2032,  -1819,  1606,  -1392,  1179,   -965,   750,   -536,   322,   -107 | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Declare a function pointer. | ||||
| Spec2Time WebRtcIsacfix_Spec2Time; | ||||
|  | ||||
| void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9, | ||||
|                              WebRtc_Word16 *inre2Q9, | ||||
| @@ -200,7 +202,7 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9, | ||||
| } | ||||
|  | ||||
|  | ||||
| void WebRtcIsacfix_Spec2Time(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebRtc_Word32 *outre1Q16, WebRtc_Word32 *outre2Q16) | ||||
| void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebRtc_Word32 *outre1Q16, WebRtc_Word32 *outre2Q16) | ||||
| { | ||||
|  | ||||
|   int k; | ||||
|   | ||||
| @@ -0,0 +1,37 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| /* Cosine table 1 in Q14 */ | ||||
| extern const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2]; | ||||
|  | ||||
| /* Sine table 1 in Q14 */ | ||||
| extern const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2]; | ||||
|  | ||||
| /* Cosine table 2 in Q14 */ | ||||
| extern const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4]; | ||||
|  | ||||
| /* Sine table 2 in Q14 */ | ||||
| extern const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4]; | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } /* extern "C" */ | ||||
| #endif | ||||
|  | ||||
| #endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ */ | ||||
| @@ -0,0 +1,382 @@ | ||||
| @ | ||||
| @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
| @ | ||||
| @ Use of this source code is governed by a BSD-style license | ||||
| @ that can be found in the LICENSE file in the root of the source | ||||
| @ tree. An additional intellectual property rights grant can be found | ||||
| @ in the file PATENTS.  All contributing project authors may | ||||
| @ be found in the AUTHORS file in the root of the source tree. | ||||
| @ | ||||
| @ Reference code in transform.c. Bit not exact due to how rounding is | ||||
| @ done in C code and ARM instructions, but quality by assembly code is | ||||
| @ not worse. | ||||
|  | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" | ||||
| #include "webrtc/system_wrappers/interface/asm_defines.h" | ||||
|  | ||||
| GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon | ||||
|  | ||||
| @ void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16 *inreQ7, | ||||
| @                                  WebRtc_Word16 *inimQ7, | ||||
| @                                  WebRtc_Word32 *outre1Q16, | ||||
| @                                  WebRtc_Word32 *outre2Q16); | ||||
|  | ||||
| DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon | ||||
| .align  2 | ||||
|   push {r4-r11,lr} | ||||
|   vpush {q4-q7} | ||||
|   sub sp, sp, #16 | ||||
|   str r0, [sp]                @ inreQ7 | ||||
|   str r1, [sp, #4]            @ inimQ7 | ||||
|   str r2, [sp, #8]            @ outre1Q16 | ||||
|   str r3, [sp, #12]           @ outre2Q16 | ||||
|  | ||||
|   mov r8, #(FRAMESAMPLES - 16) | ||||
|   add r12, r0, r8             @ &inreQ7[FRAMESAMPLES/2 - 8] | ||||
|   add r11, r1, r8             @ &inimQ7[FRAMESAMPLES/2 - 8] | ||||
|   add r4, r2, r8, lsl #1      @ &outRe1Q16[FRAMESAMPLES/2 - 8] | ||||
|   add r6, r3, r8, lsl #1      @ &outRe2Q16[FRAMESAMPLES/2 - 8] | ||||
|  | ||||
|   mov r8, #(FRAMESAMPLES / 2) @ loop counter | ||||
|   ldr r9, =kCosTab2 | ||||
|   ldr r10, =kSinTab2 | ||||
|   mov r5, #-32 | ||||
|   mov r7, #-16 | ||||
|   vmov.u32 q6, #0             @ Initialize the maximum values for tmpInIm. | ||||
|   vmov.u32 q7, #0             @ Initialize the maximum values for tmpInRe. | ||||
|  | ||||
| TRANSFORM_AND_FIND_MAX: | ||||
| @ Use ">> 5", instead of "<< 9" and then ">> 14" as in the C code. | ||||
| @ Bit-exact. | ||||
|  | ||||
|   vld1.16 {q0}, [r9]!         @ kCosTab2[] | ||||
|   vld1.16 {q1}, [r10]!        @ kSinTab2[] | ||||
|   vld1.16 {q2}, [r0]!         @ inreQ7[] | ||||
|   vld1.16 {q3}, [r1]!         @ inimQ7[] | ||||
|  | ||||
|   vmull.s16 q8, d0, d4        @ kCosTab2[k] x inreQ7[k] | ||||
|   vmull.s16 q9, d1, d5        @ kCosTab2[k] x inreQ7[k] | ||||
|   vmull.s16 q10, d2, d6       @ kSinTab2[k] x inimQ7[k] | ||||
|   vmull.s16 q11, d3, d7       @ kSinTab2[k] x inimQ7[k] | ||||
|   vmull.s16 q12, d0, d6       @ kCosTab2[k] x inimQ7[k] | ||||
|   vmull.s16 q13, d1, d7       @ kCosTab2[k] x inimQ7[k] | ||||
|   vmull.s16 q14, d2, d4       @ kSinTab2[k] x inreQ7[k] | ||||
|   vmull.s16 q15, d3, d5       @ kSinTab2[k] x inreQ7[k] | ||||
|  | ||||
|   vld1.16 {q2}, [r11], r7     @ inimQ7[FRAMESAMPLES/2 - 9 - i] | ||||
|   vld1.16 {q3}, [r12], r7     @ inreQ7[FRAMESAMPLES/2 - 9 - i] | ||||
|  | ||||
|   vadd.s32 q8, q8, q10 | ||||
|   vadd.s32 q9, q9, q11 | ||||
|   vsub.s32 q12, q12, q14 | ||||
|   vsub.s32 q13, q13, q15 | ||||
|  | ||||
|   subs r8, #16 | ||||
|  | ||||
|   vrev64.16 q2, q2            @ Reverse the order of the samples | ||||
|   vrev64.16 q3, q3            @ Reverse the order of the samples | ||||
|  | ||||
|   vshr.s32 q8, q8, #5         @ xrQ16 | ||||
|   vshr.s32 q9, q9, #5         @ xrQ16 | ||||
|   vshr.s32 q12, q12, #5       @ xiQ16 | ||||
|   vshr.s32 q13, q13, #5       @ xiQ16 | ||||
|  | ||||
|   vmull.s16 q10, d0, d7       @ kCosTab2[k] * inreQ7[k] | ||||
|   vmull.s16 q11, d1, d6       @ kCosTab2[k] * inreQ7[k] | ||||
|   vmull.s16 q14, d2, d5       @ kSinTab2[k] * inimQ7[k] | ||||
|   vmull.s16 q15, d3, d4       @ kSinTab2[k] * inimQ7[k] | ||||
|  | ||||
|   vmull.s16 q4, d0, d5        @ kCosTab2[k] * inimQ7[] | ||||
|   vmull.s16 q5, d1, d4        @ kCosTab2[k] * inimQ7[] | ||||
|   vmull.s16 q0, d2, d7        @ kSinTab2[k] * inreQ7[] | ||||
|   vmull.s16 q2, d3, d6        @ kSinTab2[k] * inreQ7[] | ||||
|  | ||||
|   vsub.s32 q14, q14, q10      @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k] | ||||
|   vsub.s32 q15, q15, q11      @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k] | ||||
|   vadd.s32 q10, q4, q0        @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] | ||||
|   vadd.s32 q11, q5, q2        @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] | ||||
|  | ||||
|   vshr.s32 q14, q14, #5       @ yiQ16 | ||||
|   vshr.s32 q15, q15, #5       @ yiQ16 | ||||
|  | ||||
|   vneg.s32 q10, q10 | ||||
|   vneg.s32 q11, q11 | ||||
|  | ||||
|   @ xrQ16 - yiQ16 | ||||
|   vsub.s32 q0, q8, q14 | ||||
|   vsub.s32 q1, q9, q15 | ||||
|  | ||||
|   vshr.s32 q10, q10, #5       @ yrQ16 | ||||
|   vshr.s32 q11, q11, #5       @ yrQ16 | ||||
|  | ||||
|   @ xrQ16 + yiQ16 | ||||
|   vadd.s32 q3, q8, q14 | ||||
|   vadd.s32 q2, q9, q15 | ||||
|  | ||||
|   @ yrQ16 + xiQ16 | ||||
|   vadd.s32 q4, q10, q12 | ||||
|   vadd.s32 q5, q11, q13 | ||||
|  | ||||
|   @ yrQ16 - xiQ16 | ||||
|   vsub.s32 q9, q10, q12 | ||||
|   vsub.s32 q8, q11, q13 | ||||
|  | ||||
|   @ Reverse the order of the samples | ||||
|   vrev64.32 q2, q2 | ||||
|   vrev64.32 q3, q3 | ||||
|   vrev64.32 q8, q8 | ||||
|   vrev64.32 q9, q9 | ||||
|   vswp d4, d5 | ||||
|   vswp d6, d7 | ||||
|   vswp d16, d17 | ||||
|   vswp d18, d19 | ||||
|  | ||||
|   vst1.32  {q0, q1}, [r2]!    @ outre1Q16[k] | ||||
|   vst1.32  {q2, q3}, [r4], r5 @ outre1Q16[FRAMESAMPLES/2 - 1 - k] | ||||
|   vst1.32  {q4, q5}, [r3]!    @ outre2Q16[k] | ||||
|   vst1.32  {q8, q9}, [r6], r5 @ outre2Q16[FRAMESAMPLES/2 - 1 - k] | ||||
|  | ||||
|   @ Find the absolute maximum in the vectors and store them in q6 and q7. | ||||
|   vabs.s32 q10, q0 | ||||
|   vabs.s32 q11, q1 | ||||
|   vabs.s32 q12, q2 | ||||
|   vabs.s32 q13, q3 | ||||
|   vabs.s32 q14, q4 | ||||
|   vmax.u32 q6, q10             @ Use u32 so we don't lose the value 0x80000000. | ||||
|   vmax.u32 q7, q14             @ Maximum for outre2Q16[]. | ||||
|   vabs.s32 q15, q5 | ||||
|   vmax.u32 q6, q11             @ Maximum for outre1Q16[]. | ||||
|   vmax.u32 q7, q15 | ||||
|   vabs.s32 q0, q8 | ||||
|   vmax.u32 q6, q12 | ||||
|   vmax.u32 q7, q0 | ||||
|   vabs.s32 q1, q9 | ||||
|   vmax.u32 q6, q13 | ||||
|   vmax.u32 q7, q1 | ||||
|  | ||||
|   bgt TRANSFORM_AND_FIND_MAX | ||||
|  | ||||
|   @ Find the maximum value in the Neon registers | ||||
|   vmax.u32 d12, d13 | ||||
|   vmax.u32 d14, d15 | ||||
|   vpmax.u32 d12, d12, d12     @ Both 32 bits words hold the same value tmpInIm. | ||||
|   vpmax.u32 d14, d14, d14     @ Both 32 bits words hold the same value tmpInRe. | ||||
|   vmax.s32 d14, d12, d14      @ if (tmpInIm>tmpInRe) tmpInRe = tmpInIm; | ||||
|  | ||||
|   ldr r4, [sp]                @ inreQ7 | ||||
|   vcls.s32  d15, d14          @ sh = WebRtcSpl_NormW32(tmpInRe); | ||||
|   ldr r5, [sp, #4]            @ inimQ7 | ||||
|   vmov.i32  d14, #24          @ sh = sh-24; | ||||
|   ldr r6, [sp, #8]            @ outre1Q16 | ||||
|   vsub.s32  d15, d15, d14 | ||||
|   ldr r7, [sp, #12]           @ outre2Q16 | ||||
|   vdup.s32  q8, d15[0]        @ sh | ||||
|  | ||||
|   mov r8, #(FRAMESAMPLES / 2) | ||||
|  | ||||
| PRE_FFT_SHIFT: | ||||
|   vld1.32 {q0, q1}, [r6]!     @ outre1Q16[] | ||||
|   vld1.32 {q2, q3}, [r6]!     @ outre1Q16[] | ||||
|   vld1.32 {q4, q5}, [r7]!     @ outre2Q16[] | ||||
|   vld1.32 {q6, q7}, [r7]!     @ outre2Q16[] | ||||
|  | ||||
|   subs r8, #16 | ||||
|  | ||||
|   vrshl.s32 q0, q0, q8 | ||||
|   vrshl.s32 q1, q1, q8 | ||||
|   vrshl.s32 q2, q2, q8 | ||||
|   vrshl.s32 q3, q3, q8 | ||||
|   vrshl.s32 q4, q4, q8 | ||||
|   vrshl.s32 q5, q5, q8 | ||||
|   vrshl.s32 q6, q6, q8 | ||||
|   vrshl.s32 q7, q7, q8 | ||||
|  | ||||
|   vmovn.s32 d0, q0 | ||||
|   vmovn.s32 d1, q1 | ||||
|   vmovn.s32 d2, q2 | ||||
|   vmovn.s32 d3, q3 | ||||
|   vmovn.s32 d4, q4 | ||||
|   vmovn.s32 d5, q5 | ||||
|   vmovn.s32 d6, q6 | ||||
|   vmovn.s32 d7, q7 | ||||
|  | ||||
|   vst1.16 {q0, q1}, [r4]!     @ inreQ7[] | ||||
|   vst1.16 {q2, q3}, [r5]!     @ inimQ7[] | ||||
|  | ||||
|   bgt PRE_FFT_SHIFT | ||||
|  | ||||
|   ldr r0, [sp]                @ inreQ7 | ||||
|   ldr r1, [sp, #4]            @ inimQ7 | ||||
|   mov r2, #1 | ||||
|   bl  WebRtcIsacfix_FftRadix16Fastest(PLT) | ||||
|  | ||||
|   ldr r4, [sp]                @ inreQ7 | ||||
|   ldr r5, [sp, #4]            @ inimQ7 | ||||
|   ldr r6, [sp, #8]            @ outre1Q16 | ||||
|   ldr r7, [sp, #12]           @ outre2Q16 | ||||
|   mov r8, #(FRAMESAMPLES / 2) | ||||
|   vneg.s32 q5, q8             @ -sh | ||||
|   movw r0, #273 | ||||
|   vdup.s32 d8, r0 | ||||
|  | ||||
| POST_FFT_SHIFT_DIVIDE: | ||||
|   vld1.16 {q0, q1}, [r4]!     @ inreQ7 | ||||
|   vld1.16 {q2, q3}, [r5]!     @ inimQ7 | ||||
|  | ||||
|   subs r8, #16 | ||||
|  | ||||
|   vmovl.s16 q6, d0 | ||||
|   vmovl.s16 q7, d1 | ||||
|   vmovl.s16 q8, d2 | ||||
|   vmovl.s16 q9, d3 | ||||
|   vmovl.s16 q0, d4 | ||||
|   vmovl.s16 q1, d5 | ||||
|   vmovl.s16 q2, d6 | ||||
|   vmovl.s16 q3, d7 | ||||
|  | ||||
|   vshl.s32 q6, q6, q5 | ||||
|   vshl.s32 q7, q7, q5 | ||||
|   vshl.s32 q8, q8, q5 | ||||
|   vshl.s32 q9, q9, q5 | ||||
|   vshl.s32 q0, q0, q5 | ||||
|   vshl.s32 q1, q1, q5 | ||||
|   vshl.s32 q2, q2, q5 | ||||
|   vshl.s32 q3, q3, q5 | ||||
|  | ||||
|   @ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre1Q16[k]) | ||||
|   vmull.s32 q10, d12, d8 | ||||
|   vmull.s32 q11, d13, d8 | ||||
|   vmull.s32 q12, d14, d8 | ||||
|   vmull.s32 q13, d15, d8 | ||||
|   vshrn.s64 d12, q10, #16 | ||||
|   vshrn.s64 d13, q11, #16 | ||||
|   vshrn.s64 d14, q12, #16 | ||||
|   vshrn.s64 d15, q13, #16 | ||||
|  | ||||
|   vmull.s32 q10, d16, d8 | ||||
|   vmull.s32 q11, d17, d8 | ||||
|   vmull.s32 q12, d18, d8 | ||||
|   vmull.s32 q13, d19, d8 | ||||
|   vshrn.s64 d16, q10, #16 | ||||
|   vshrn.s64 d17, q11, #16 | ||||
|   vshrn.s64 d18, q12, #16 | ||||
|   vshrn.s64 d19, q13, #16 | ||||
|  | ||||
|   @ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre2Q16[k]) | ||||
|   vmull.s32 q10, d0, d8 | ||||
|   vmull.s32 q11, d1, d8 | ||||
|   vmull.s32 q12, d2, d8 | ||||
|   vmull.s32 q13, d3, d8 | ||||
|   vshrn.s64 d0, q10, #16 | ||||
|   vshrn.s64 d1, q11, #16 | ||||
|   vshrn.s64 d2, q12, #16 | ||||
|   vshrn.s64 d3, q13, #16 | ||||
|  | ||||
|   vmull.s32 q10, d4, d8 | ||||
|   vmull.s32 q11, d5, d8 | ||||
|   vmull.s32 q12, d6, d8 | ||||
|   vmull.s32 q13, d7, d8 | ||||
|   vshrn.s64 d4, q10, #16 | ||||
|   vshrn.s64 d5, q11, #16 | ||||
|   vshrn.s64 d6, q12, #16 | ||||
|   vshrn.s64 d7, q13, #16 | ||||
|  | ||||
|   vst1.32 {q6, q7}, [r6]!     @ outre1Q16[] | ||||
|   vst1.32 {q8, q9}, [r6]!     @ outre1Q16[] | ||||
|   vst1.32 {q0, q1}, [r7]!     @ outre2Q16[] | ||||
|   vst1.32 {q2, q3}, [r7]!     @ outre2Q16[] | ||||
|  | ||||
|   bgt POST_FFT_SHIFT_DIVIDE | ||||
|  | ||||
|   mov r8, #(FRAMESAMPLES / 2) | ||||
|   ldr r9, =kCosTab1 | ||||
|   ldr r10, =kSinTab1 | ||||
|   ldr r2, [sp, #8]            @ outre1Q16 | ||||
|   ldr r3, [sp, #12]           @ outre2Q16 | ||||
|   movw r0, #31727 | ||||
|  | ||||
| DEMODULATE_AND_SEPARATE: | ||||
|   vld1.16 {q0}, [r9]!         @ kCosTab1[] | ||||
|   vld1.16 {q1}, [r10]!        @ kSinTab1[] | ||||
|   vld1.32 {q2, q3}, [r2]      @ outre1Q16 | ||||
|   vld1.32 {q4, q5}, [r3]      @ outre2Q16 | ||||
|  | ||||
|   vmovl.s16 q6, d0            @ kCosTab1[] | ||||
|   vmovl.s16 q7, d1            @ kCosTab1[] | ||||
|   vmovl.s16 q8, d2            @ kSinTab1[] | ||||
|   vmovl.s16 q9, d3            @ kSinTab1[] | ||||
|  | ||||
|   vmull.s32 q10, d12, d4      @ kCosTab1[k] * outre1Q16[k] | ||||
|   vmull.s32 q11, d13, d5      @ kCosTab1[k] * outre1Q16[k] | ||||
|   vmull.s32 q12, d14, d6      @ kCosTab1[k] * outre1Q16[k] | ||||
|   vmull.s32 q13, d15, d7      @ kCosTab1[k] * outre1Q16[k] | ||||
|  | ||||
|   vmull.s32 q0, d16, d8       @ kSinTab1[k] * outre2Q16[k] | ||||
|   vmull.s32 q1, d17, d9       @ kSinTab1[k] * outre2Q16[k] | ||||
|   vmull.s32 q14, d18, d10     @ kSinTab1[k] * outre2Q16[k] | ||||
|   vmull.s32 q15, d19, d11     @ kSinTab1[k] * outre2Q16[k] | ||||
|  | ||||
|   vsub.s64 q10, q10, q0 | ||||
|   vsub.s64 q11, q11, q1 | ||||
|   vsub.s64 q12, q12, q14 | ||||
|   vsub.s64 q13, q13, q15 | ||||
|  | ||||
|   vrshrn.s64 d20, q10, #14    @ xrQ16 | ||||
|   vrshrn.s64 d21, q11, #14    @ xrQ16 | ||||
|   vrshrn.s64 d22, q12, #14    @ xrQ16 | ||||
|   vrshrn.s64 d23, q13, #14    @ xrQ16 | ||||
|  | ||||
|   subs r8, #8 | ||||
|  | ||||
|   vmull.s32 q12, d12, d8      @ kCosTab1[k] * outre2Q16[k] | ||||
|   vmull.s32 q13, d13, d9      @ kCosTab1[k] * outre2Q16[k] | ||||
|   vmull.s32 q14, d14, d10     @ kCosTab1[k] * outre2Q16[k] | ||||
|   vmull.s32 q15, d15, d11     @ kCosTab1[k] * outre2Q16[k] | ||||
|  | ||||
|   vdup.s32 d9, r0             @ generic -> Neon doesn't cost extra cycles. | ||||
|  | ||||
|   vmull.s32 q0, d16, d4       @ kSinTab1[k] * outre1Q16[k] | ||||
|   vmull.s32 q1, d17, d5       @ kSinTab1[k] * outre1Q16[k] | ||||
|   vmull.s32 q6, d18, d6       @ kSinTab1[k] * outre1Q16[k] | ||||
|   vmull.s32 q7, d19, d7       @ kSinTab1[k] * outre1Q16[k] | ||||
|  | ||||
|   vadd.s64 q12, q12, q0 | ||||
|   vadd.s64 q13, q13, q1 | ||||
|   vadd.s64 q14, q14, q6 | ||||
|   vadd.s64 q15, q15, q7 | ||||
|  | ||||
|   vrshrn.s64 d24, q12, #14    @ xiQ16 | ||||
|   vrshrn.s64 d25, q13, #14    @ xiQ16 | ||||
|   vrshrn.s64 d26, q14, #14    @ xiQ16 | ||||
|   vrshrn.s64 d27, q15, #14    @ xiQ16 | ||||
|  | ||||
|   @ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16) | ||||
|   vmull.s32 q0, d20, d9 | ||||
|   vmull.s32 q1, d21, d9 | ||||
|   vmull.s32 q2, d22, d9 | ||||
|   vmull.s32 q3, d23, d9 | ||||
|  | ||||
|   vrshrn.s64 d0, q0, #11 | ||||
|   vrshrn.s64 d1, q1, #11 | ||||
|   vrshrn.s64 d2, q2, #11 | ||||
|   vrshrn.s64 d3, q3, #11 | ||||
|  | ||||
|   @ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xiQ16) | ||||
|   vmull.s32 q6, d24, d9 | ||||
|   vmull.s32 q7, d25, d9 | ||||
|   vmull.s32 q8, d26, d9 | ||||
|   vmull.s32 q9, d27, d9 | ||||
|  | ||||
|   vrshrn.s64 d4, q6, #11 | ||||
|   vrshrn.s64 d5, q7, #11 | ||||
|   vrshrn.s64 d6, q8, #11 | ||||
|   vrshrn.s64 d7, q9, #11 | ||||
|  | ||||
|   vst1.16 {q0, q1}, [r2]!     @ outre1Q16[] | ||||
|   vst1.16 {q2, q3}, [r3]!     @ outre2Q16[] | ||||
|  | ||||
|   bgt DEMODULATE_AND_SEPARATE | ||||
|  | ||||
|   add sp, sp, #16 | ||||
|   vpop {q4-q7} | ||||
|   pop {r4-r11,pc} | ||||
| @@ -0,0 +1,121 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
| #include "gtest/gtest.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" | ||||
| #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" | ||||
|  | ||||
| class TransformTest : public testing::Test { | ||||
|  protected: | ||||
|    TransformTest() { | ||||
|      WebRtcSpl_Init(); | ||||
|    } | ||||
|  | ||||
|   // Pass a function pointer to the Tester function. | ||||
|   void Spec2TimeTester(Spec2Time Spec2TimeFunction) { | ||||
|     // WebRtcIsacfix_Spec2Time functions hard coded the buffer lengths. It's a | ||||
|     // large buffer but we have to test it here. | ||||
|     const int kSamples = FRAMESAMPLES/2; | ||||
|     int16_t data_in_1[kSamples] = {0}; | ||||
|     int16_t data_in_2[kSamples] = {0}; | ||||
|     int32_t data_out_1[kSamples] = {0}; | ||||
|     int32_t data_out_2[kSamples] = {0}; | ||||
|     int32_t out_expected_1[kSamples]= {-3366470, -2285227, -3415765, | ||||
|         -2310215, -3118030, -2222470, -3030254, -2192091, -3423170, -2216041, | ||||
|         -3305541, -2171936, -3195767, -2095779, -3153304, -2157560, -3071167, | ||||
|         -2032108, -3101190, -1972016, -3103824, -2089118, -3139811, -1898337, | ||||
|         -3102801, -2055082, -3029665, -1854140, -2962586, -1966454, -3071167, | ||||
|         -1894588, -2851743, -1917315, -2848087, -1594932, -2799242, -1462184, | ||||
|         -2845887, -1437599, -2691776, -1329637, -2770659, -1268491, -2625161, | ||||
|         -1578991, -2460299, -1186385, -2365613, -1039354, -2322608, -958518, | ||||
|         -2271749, -789860, -2254538, -850308, -2384436, -850959, -2133734, | ||||
|         -587678, -2093316, -495115, -1973364, -475177, -1801282, -173507, | ||||
|         -1848516, -158015, -1792018, -62648, -1643313, 214746, -1500758, 267077, | ||||
|         -1450193, 560521, -1521579, 675283, -1345408, 857559, -1300822, 1116332, | ||||
|         -1294533, 1241117, -1070027, 1263503, -983816, 1529821, -1019586, | ||||
|         1910421, -955420, 2073688, -836459, 2401105, -653905, 2690474, -731425, | ||||
|         2930131, -935234, 3299500, -875978, 3523432, -878906, 3924822, -1081630, | ||||
|         4561267, -1203023, 5105274, -1510983, 6052762, -2294646, 7021597, | ||||
|         -3108053, 8826736, -4935222, 11678789, -8442713, 18725700, -21526692, | ||||
|         25420577, 19589811, -28108666, 12634054, -14483066, 6263217, -9979706, | ||||
|         3665661, -7909736, 2531530, -6434896, 1700772, -5525393, 1479473, | ||||
|         -4894262, 1231760, -4353044, 1032940, -3786590, 941152, -3331614, | ||||
|         665090, -2851619, 830696, -2762201, 958007, -2483118, 788233, -2184965, | ||||
|         804825, -1967306, 1007255, -1862474, 920889, -1457506, 755406, -1405841, | ||||
|         890230, -1302124, 1161599, -701867, 1154163, -1083366, 1204743, -513581, | ||||
|         1547264, -650636, 1493384, -285543, 1771863, -277906, 1841343, -9078, | ||||
|         1751863, 230222, 1819578, 207170, 1978972, 398137, 2106468, 552155, | ||||
|         1997624, 685213, 2129520, 601078, 2238736, 944591, 2441879, 1194178, | ||||
|         2355280, 986124, 2393328, 1049005, 2417944, 1208368, 2489516, 1352023, | ||||
|         2572118, 1445283, 2856081, 1532997, 2742279, 1615877, 2915274, 1808036, | ||||
|         2856871, 1806936, 3241747, 1622461, 2978558, 1841297, 3010378, 1923666, | ||||
|         3271367, 2126700, 3070935, 1956958, 3107588, 2128405, 3288872, 2114911, | ||||
|         3315952, 2406651, 3344038, 2370199, 3368980, 2144361, 3305030, 2183803, | ||||
|         3401450, 2523102, 3405463, 2452475, 3463355, 2421678, 3551968, 2431949, | ||||
|         3477251, 2148125, 3244489, 2174090}; | ||||
|     int32_t out_expected_2[kSamples]= {1691694, -2499988, -2035547, | ||||
|         1060469, 988634, -2044502, -306271, 2041000, 201454, -2289456, 93694, | ||||
|         2129427, -369152, -1887834, 860796, 2089102, -929424, -1673956, 1395291, | ||||
|         1785651, -1619673, -1380109, 1963449, 1093311, -2111007, -840456, | ||||
|         2372786, 578119, -2242702, 89774, 2463304, -132717, -2121480, 643634, | ||||
|         2277636, -1125999, -1995858, 1543748, 2227861, -1483779, -1495491, | ||||
|         2102642, 1833876, -1920568, -958378, 2485101, 772261, -2454257, -24942, | ||||
|         2918714, 136838, -2500453, 816118, 3039735, -746560, -2365815, 1586396, | ||||
|         2714951, -1511696, -1942334, 2571792, 2182827, -2325335, -1311543, | ||||
|         3055970, 1367220, -2737182, -110626, 3889222, 631008, -3280879, 853066, | ||||
|         4122279, -706638, -3334449, 2148311, 3993512, -1846301, -3004894, | ||||
|         3426779, 3329522, -3165264, -2242423, 4756866, 2557711, -4131280, | ||||
|         -805259, 5702711, 1120592, -4852821, 743664, 6476444, -621186, -5465828, | ||||
|         2815787, 6768835, -3017442, -5338409, 5658126, 6838454, -5492288, | ||||
|         -4682382, 8874947, 6153814, -8832561, -2649251, 12817398, 4237692, | ||||
|         -13000247, 1190661, 18986363, -115738, -19693978, 9908367, 30660381, | ||||
|         -10632635, -37962068, 47022884, 89744622, -42087632, 40279224, | ||||
|         -88869341, -47542383, 38572364, 10441576, -30339718, -9926740, 19896578, | ||||
|         28009, -18886612, -1124047, 13232498, -4150304, -12770551, 2637074, | ||||
|         9051831, -6162211, -8713972, 4557937, 5489716, -6862312, -5532349, | ||||
|         5415449, 2791310, -6999367, -2790102, 5375806, 546222, -6486452, | ||||
|         -821261, 4994973, -1278840, -5645501, 1060484, 3996285, -2503954, | ||||
|         -4653629, 2220549, 3036977, -3282133, -3318585, 2780636, 1789880, | ||||
|         -4004589, -2041031, 3105373, 574819, -3992722, -971004, 3001703, | ||||
|         -676739, -3841508, 417284, 2897970, -1427018, -3058480, 1189948, | ||||
|         2210960, -2268992, -2603272, 1949785, 1576172, -2720404, -1891738, | ||||
|         2309456, 769178, -2975646, -707150, 2424652, -88039, -2966660, -65452, | ||||
|         2320780, -957557, -2798978, 744640, 1879794, -1672081, -2365319, | ||||
|         1253309, 1366383, -2204082, -1544367, 1801452, 613828, -2531994, | ||||
|         -983847, 2064842, 118326, -2613790, -203220, 2219635, -730341, -2641861, | ||||
|         563557, 1765434, -1329916, -2272927, 1037138, 1266725, -1939220, | ||||
|         -1588643, 1754528, 816552, -2376303, -1099167, 1864999, 122477, | ||||
|         -2422762, -400027, 1889228, -579916, -2490353, 287139, 2011318, | ||||
|         -1176657, -2502978, 812896, 1116502, -1940211}; | ||||
|  | ||||
|     for(int i = 0; i < kSamples; i++) { | ||||
|       data_in_1[i] = i * i + 1777; | ||||
|       data_in_2[i] = WEBRTC_SPL_WORD16_MAX / (i + 1) + 17; | ||||
|     } | ||||
|  | ||||
|     Spec2TimeFunction(data_in_1, data_in_2, data_out_1, data_out_2); | ||||
|  | ||||
|     for (int i = 0; i < kSamples; i++) { | ||||
|       // We don't require bit-exact for ARM assembly code. | ||||
|       EXPECT_LE(abs(out_expected_1[i] - data_out_1[i]), 16); | ||||
|       EXPECT_LE(abs(out_expected_2[i] - data_out_2[i]), 16); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| TEST_F(TransformTest, Spec2TimeTest) { | ||||
|   Spec2TimeTester(WebRtcIsacfix_Spec2TimeC); | ||||
| #ifdef WEBRTC_DETECT_ARM_NEON | ||||
|   if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { | ||||
|     Spec2TimeTester(WebRtcIsacfix_Spec2TimeNeon); | ||||
|   } | ||||
| #elif defined(WEBRTC_ARCH_ARM_NEON) | ||||
|   Spec2TimeTester(WebRtcIsacfix_Spec2TimeNeon); | ||||
| #endif | ||||
| } | ||||
| @@ -166,6 +166,7 @@ | ||||
|              '../../codecs/isac/fix/source/filters_unittest.cc', | ||||
|              '../../codecs/isac/fix/source/filterbanks_unittest.cc', | ||||
|              '../../codecs/isac/fix/source/lpc_masking_model_unittest.cc', | ||||
|              '../../codecs/isac/fix/source/transform_unittest.cc', | ||||
|           ], | ||||
|         }, # audio_coding_unittests | ||||
|       ], | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 kma@webrtc.org
					kma@webrtc.org