diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c index f2ad16459..c317b8f41 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c @@ -15,14 +15,21 @@ * */ -#include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h" - #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" +#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON) +/* Tables are defined in ARM assembly files. */ /* Cosine table 1 in Q14 */ -const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { +extern const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2]; +/* Sine table 1 in Q14 */ +extern const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2]; +/* Sine table 2 in Q14 */ +extern const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4]; +#else +/* Cosine table 1 in Q14 */ +static const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = { 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, @@ -41,17 +48,17 @@ const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { -6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006, -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803, -9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433, - -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, -12867, - -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, -13970, -14081, - -14189, -14295, -14399, -14500, -14598, -14694, -14788, -14879, -14968, -15053, - -15137, -15218, -15296, -15371, -15444, -15515, -15582, -15647, -15709, -15769, - -15826, -15880, -15931, -15980, -16026, -16069, -16110, -16147, -16182, -16214, - -16244, -16270, -16294, -16315, -16333, -16349, -16362, -16371, -16378, -16383 + -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, + -12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, + -13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788, + -14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515, + -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026, + -16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315, + -16333, -16349, -16362, -16371, -16378, -16383 }; - /* Sine table 1 in Q14 */ -const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { +static const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = { 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071, @@ -80,7 +87,7 @@ const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { /* Sine table 2 in Q14 */ -const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { +static const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = { 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178, @@ -94,6 +101,7 @@ const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { 4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245, 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107 }; +#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON // Declare function pointers. Spec2Time WebRtcIsacfix_Spec2Time; @@ -115,8 +123,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9, /* Multiply with complex exponentials and combine into one complex vector */ factQ19 = 16921; // 0.5/sqrt(240) in Q19 is round(.5/sqrt(240)*(2^19)) = 16921 for (k = 0; k < FRAMESAMPLES/2; k++) { - tmp1rQ14 = kCosTab1[k]; - tmp1iQ14 = kSinTab1[k]; + tmp1rQ14 = WebRtcIsacfix_kCosTab1[k]; + tmp1iQ14 = WebRtcIsacfix_kSinTab1[k]; xrQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre1Q9[k]) + WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre2Q9[k]), 7); xiQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre2Q9[k]) - WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre1Q9[k]), 7); tmpreQ16[k] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(factQ19, xrQ16)+4, 3); // (Q16*Q19>>16)>>3 = Q16 @@ -171,8 +179,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9, yiQ16 = -tmpreQ16[k] + tmpreQ16[FRAMESAMPLES/2 - 1 - k]; xiQ16 = tmpimQ16[k] - tmpimQ16[FRAMESAMPLES/2 - 1 - k]; yrQ16 = tmpimQ16[k] + tmpimQ16[FRAMESAMPLES/2 - 1 - k]; - tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k]; - tmp1iQ14 = kSinTab2[k]; + tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k]; + tmp1iQ14 = WebRtcIsacfix_kSinTab2[k]; v1Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xrQ16) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xiQ16); v2Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xrQ16) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xiQ16); outreQ7[k] = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(v1Q16, 9); @@ -198,8 +206,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR for (k = 0; k < FRAMESAMPLES/4; k++) { /* Move zero in time to beginning of frames */ - tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k]; - tmp1iQ14 = kSinTab2[k]; + tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k]; + tmp1iQ14 = WebRtcIsacfix_kSinTab2[k]; tmpInRe = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inreQ7[k], 9); // Q7 -> Q16 tmpInIm = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inimQ7[k], 9); // Q7 -> Q16 @@ -270,8 +278,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR /* Demodulate and separate */ factQ11 = 31727; // sqrt(240) in Q11 is round(15.49193338482967*2048) = 31727 for (k = 0; k < FRAMESAMPLES/2; k++) { - tmp1rQ14 = kCosTab1[k]; - tmp1iQ14 = kSinTab1[k]; + tmp1rQ14 = WebRtcIsacfix_kCosTab1[k]; + tmp1iQ14 = WebRtcIsacfix_kSinTab1[k]; xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre1Q16[k]) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre2Q16[k]); xiQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre2Q16[k]) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre1Q16[k]); xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16); diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S index 424d5413d..3f1ab791d 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S @@ -16,6 +16,9 @@ GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon GLOBAL_FUNCTION WebRtcIsacfix_Time2SpecNeon +GLOBAL_LABEL WebRtcIsacfix_kSinTab1 +GLOBAL_LABEL WebRtcIsacfix_kCosTab1 +GLOBAL_LABEL WebRtcIsacfix_kSinTab2 @ void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9, @ WebRtc_Word16* inre2Q9, @@ -39,9 +42,9 @@ DEFINE_FUNCTION WebRtcIsacfix_Time2SpecNeon add r4, sp, #16 @ tmpreQ16; add r5, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16; - adr r9, kCosTab1 - mov r6, #(kSinTab1 - kCosTab1) - add r10, r9, r6 @ kSinTab1 + adr r9, WebRtcIsacfix_kCosTab1 + mov r6, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1) + add r10, r9, r6 @ WebRtcIsacfix_kSinTab1 vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm. vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe. @@ -55,18 +58,18 @@ Time2Spec_TransformAndFindMax: subs r8, #8 - vld1.16 {q0}, [r9:64]! @ kCosTab1[] + vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[] vld1.16 {q2}, [r0]! @ inre1Q9[] - vmull.s16 q8, d0, d4 @ kCosTab1[k] * inre1Q9[k] - vld1.16 {q1}, [r10:64]! @ kSinTab1[] - vmull.s16 q9, d1, d5 @ kCosTab1[k] * inre1Q9[k] + vmull.s16 q8, d0, d4 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k] + vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[] + vmull.s16 q9, d1, d5 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k] vld1.16 {q3}, [r1]! @ inre2Q9[] - vmlal.s16 q8, d2, d6 @ kSinTab1[k] * inre2Q9[k] - vmlal.s16 q9, d3, d7 @ kSinTab1[k] * inre2Q9[k] - vmull.s16 q12, d0, d6 @ kCosTab1[k] * inre2Q9[k] - vmull.s16 q13, d1, d7 @ kCosTab1[k] * inre2Q9[k] - vmlsl.s16 q12, d2, d4 @ kSinTab1[k] * inre1Q9[k] - vmlsl.s16 q13, d3, d5 @ kSinTab1[k] * inre1Q9[k] + vmlal.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k] + vmlal.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k] + vmull.s16 q12, d0, d6 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k] + vmull.s16 q13, d1, d7 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k] + vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k] + vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k] vqdmulh.s32 q0, q8, q4 @ xrQ16 * factQ19 vqdmulh.s32 q1, q9, q4 @ xrQ16 * factQ19 @@ -153,9 +156,9 @@ Time2Spec_PreFftShift: ldr r7, [sp, #4] @ inre2Q9 add r4, r6, r8 @ &inre1Q9[FRAMESAMPLES / 2 - 4] add r5, r7, r8 @ &inre2Q9[FRAMESAMPLES / 2 - 4] - adr r10, kSinTab2 + adr r10, WebRtcIsacfix_kSinTab2 - add r9, r10, #(120*2 - 8) @ &kSinTab2[119 - 4] + add r9, r10, #(120*2 - 8) @ &WebRtcIsacfix_kSinTab2[119 - 4] mov r8, #(FRAMESAMPLES / 4) @ loop counter @@ -170,9 +173,9 @@ Time2Spec_PostFftTransform: @ By vshl, we effectively did "<< (-sh - 23)", instead of "<< (-sh)", @ ">> 14" and then ">> 9" as in the C code. - vld1.16 {d6}, [r9:64] @ kCosTab2[] + vld1.16 {d6}, [r9, :64] @ kCosTab2[] vneg.s16 d6, d6 - vld1.16 {d7}, [r10:64]! @ kSinTab2[] + vld1.16 {d7}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[] vrev64.16 q1, q1 @ Reverse samples in 2nd half of xrQ16[]. vqadd.s16 d4, d0, d2 @ xrQ16 vqsub.s16 d5, d1, d3 @ xiQ16 @@ -187,12 +190,12 @@ Time2Spec_PostFftTransform: vqsub.s16 d0, d2, d0 @ yiQ16 vmull.s16 q12, d6, d4 @ kCosTab2[k] * xrQ16 - vmlsl.s16 q12, d7, d5 @ kSinTab2[k] * xiQ16 - vmull.s16 q13, d7, d4 @ kSinTab2[k] * xrQ16 + vmlsl.s16 q12, d7, d5 @ WebRtcIsacfix_kSinTab2[k] * xiQ16 + vmull.s16 q13, d7, d4 @ WebRtcIsacfix_kSinTab2[k] * xrQ16 vmlal.s16 q13, d6, d5 @ kCosTab2[k] * xiQ16 - vmull.s16 q6, d7, d1 @ kSinTab2[k] * yrQ16 + vmull.s16 q6, d7, d1 @ WebRtcIsacfix_kSinTab2[k] * yrQ16 vmlal.s16 q6, d6, d0 @ kCosTab2[k] * yiQ16 - vmull.s16 q7, d7, d0 @ kSinTab2[k] * yiQ16 + vmull.s16 q7, d7, d0 @ WebRtcIsacfix_kSinTab2[k] * yiQ16 vmlsl.s16 q7, d6, d1 @ kCosTab2[k] * yrQ16 vshl.s32 q12, q12, q15 @@ -225,7 +228,7 @@ Time2Spec_PostFftTransform: .align 8 @ Cosine table 1 in Q14 -kCosTab1: +WebRtcIsacfix_kCosTab1: .short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315 .short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069 .short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647 @@ -259,7 +262,7 @@ kCosTab1: .align 8 @ Sine table 2 in Q14 -kSinTab2: +WebRtcIsacfix_kSinTab2: .short 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305 .short 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048 .short 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615 @@ -280,7 +283,7 @@ kSinTab2: .align 8 @ Sine table 1 in Q14 -kSinTab1: +WebRtcIsacfix_kSinTab1: .short 0, 214, 429, 643, 857, 1072, 1285, 1499 .short 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196 .short 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859 @@ -334,8 +337,8 @@ DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES / 2 - 8] mov r8, #(FRAMESAMPLES / 2) @ loop counter - adr r10, kSinTab2 - add r9, r10, #(120*2 - 16) @ &kSinTab2[119 - 8] + adr r10, WebRtcIsacfix_kSinTab2 + add r9, r10, #(120*2 - 16) @ &WebRtcIsacfix_kSinTab2[119 - 8] mov r5, #-32 @@ -349,24 +352,24 @@ TransformAndFindMax: subs r8, #16 - vld1.16 {q0}, [r9:64] @ kCosTab2[] + vld1.16 {q0}, [r9, :64] @ kCosTab2[] sub r9, #16 vld1.16 {q2}, [r0]! @ inreQ7[] vneg.s16 q0, q0 vld1.16 {q3}, [r1]! @ inimQ7[] vrev64.16 d0, d0 vrev64.16 d1, d1 - vld1.16 {q1}, [r10:64]! @ kSinTab2[] + vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[] vswp d0, d1 - vmull.s16 q8, d2, d6 @ kSinTab2[k] * inimQ7[k] - vmull.s16 q9, d3, d7 @ kSinTab2[k] * inimQ7[k] + vmull.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k] + vmull.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k] vmlal.s16 q8, d0, d4 @ kCosTab2[k] * inreQ7[k] vmlal.s16 q9, d1, d5 @ kCosTab2[k] * inreQ7[k] vmull.s16 q12, d0, d6 @ kCosTab2[k] * inimQ7[k] vmull.s16 q13, d1, d7 @ kCosTab2[k] * inimQ7[k] - vmlsl.s16 q12, d2, d4 @ kSinTab2[k] * inreQ7[k] - vmlsl.s16 q13, d3, d5 @ kSinTab2[k] * inreQ7[k] + vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k] + vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k] vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES / 2 - 8 + i] vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES / 2 - 8 + i] @@ -374,15 +377,15 @@ TransformAndFindMax: vrev64.16 q2, q2 @ Reverse the order of the samples vrev64.16 q3, q3 @ Reverse the order of the samples - vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k] - vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k] - vmlsl.s16 q14, d0, d7 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k] - vmlsl.s16 q15, d1, d6 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k] + vmull.s16 q14, d2, d5 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k] + vmull.s16 q15, d3, d4 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k] + vmlsl.s16 q14, d0, d7 @ q14 -= kCosTab2[k] * inreQ7[k] + vmlsl.s16 q15, d1, d6 @ q15 -= kCosTab2[k] * inreQ7[k] vmull.s16 q10, d0, d5 @ kCosTab2[k] * inimQ7[] vmull.s16 q11, d1, d4 @ kCosTab2[k] * inimQ7[] - vmlal.s16 q10, d2, d7 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] - vmlal.s16 q11, d3, d6 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] + vmlal.s16 q10, d2, d7 @ q10 += WebRtcIsacfix_kSinTab2[k] * inreQ7[] + vmlal.s16 q11, d3, d6 @ q11 += WebRtcIsacfix_kSinTab2[k] * inreQ7[] vshr.s32 q8, q8, #5 @ xrQ16 vshr.s32 q9, q9, #5 @ xrQ16 @@ -448,9 +451,9 @@ TransformAndFindMax: bgt TransformAndFindMax - adr r10, kSinTab1 - mov r2, #(kSinTab1 - kCosTab1) - sub r9, r10, r2 @ kCosTab1 + adr r10, WebRtcIsacfix_kSinTab1 + mov r2, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1) + sub r9, r10, r2 @ WebRtcIsacfix_kCosTab1 @ Find the maximum value in the Neon registers vmax.u32 d12, d13 @@ -565,39 +568,39 @@ PostFftShiftDivide: DemodulateAndSeparate: subs r8, #8 - vld1.16 {q0}, [r9:64]! @ kCosTab1[] - vmovl.s16 q6, d0 @ kCosTab1[] - vld1.16 {q1}, [r10:64]! @ kSinTab1[] - vmovl.s16 q7, d1 @ kCosTab1[] + vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[] + vmovl.s16 q6, d0 @ WebRtcIsacfix_kCosTab1[] + vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[] + vmovl.s16 q7, d1 @ WebRtcIsacfix_kCosTab1[] vld1.32 {q2, q3}, [r2] @ outre1Q16 - vmovl.s16 q8, d2 @ kSinTab1[] + vmovl.s16 q8, d2 @ WebRtcIsacfix_kSinTab1[] vld1.32 {q4, q5}, [r3] @ outre2Q16 - vmovl.s16 q9, d3 @ kSinTab1[] + vmovl.s16 q9, d3 @ WebRtcIsacfix_kSinTab1[] - vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k] - vmull.s32 q11, d13, d5 @ kCosTab1[k] * outre1Q16[k] - vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k] - vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k] + vmull.s32 q10, d12, d4 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k] + vmull.s32 q11, d13, d5 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k] + vmull.s32 q12, d14, d6 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k] + vmull.s32 q13, d15, d7 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k] - vmlsl.s32 q10, d16, d8 @ += kSinTab1[k] * outre2Q16[k] - vmlsl.s32 q11, d17, d9 @ += kSinTab1[k] * outre2Q16[k] - vmlsl.s32 q12, d18, d10 @ += kSinTab1[k] * outre2Q16[k] - vmlsl.s32 q13, d19, d11 @ += kSinTab1[k] * outre2Q16[k] + vmlsl.s32 q10, d16, d8 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k] + vmlsl.s32 q11, d17, d9 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k] + vmlsl.s32 q12, d18, d10 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k] + vmlsl.s32 q13, d19, d11 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k] vrshrn.s64 d20, q10, #10 @ xrQ16 vrshrn.s64 d21, q11, #10 @ xrQ16 vrshrn.s64 d22, q12, #10 @ xrQ16 vrshrn.s64 d23, q13, #10 @ xrQ16 - vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k] - vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k] - vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k] - vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k] + vmull.s32 q12, d12, d8 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k] + vmull.s32 q13, d13, d9 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k] + vmull.s32 q14, d14, d10 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k] + vmull.s32 q15, d15, d11 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k] - vmlal.s32 q12, d16, d4 @ += kSinTab1[k] * outre1Q16[k] - vmlal.s32 q13, d17, d5 @ += kSinTab1[k] * outre1Q16[k] - vmlal.s32 q14, d18, d6 @ += kSinTab1[k] * outre1Q16[k] - vmlal.s32 q15, d19, d7 @ += kSinTab1[k] * outre1Q16[k] + vmlal.s32 q12, d16, d4 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k] + vmlal.s32 q13, d17, d5 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k] + vmlal.s32 q14, d18, d6 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k] + vmlal.s32 q15, d19, d7 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k] vdup.s32 q4, r0 @ generic -> Neon doesn't cost extra cycles. @@ -622,4 +625,3 @@ DemodulateAndSeparate: add sp, sp, #16 vpop {q4-q7} pop {r4-r11,pc} - diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c index 13ab85307..28eb4f232 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core.c @@ -27,31 +27,21 @@ FILE *dfile; FILE *testfile; #endif -#ifdef AECM_SHORT - -// Square root of Hanning window in Q14 -const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] = -{ - 0, 804, 1606, 2404, 3196, 3981, 4756, 5520, - 6270, 7005, 7723, 8423, 9102, 9760, 10394, 11003, - 11585, 12140, 12665, 13160, 13623, 14053, 14449, 14811, - 15137, 15426, 15679, 15893, 16069, 16207, 16305, 16364, - 16384 -}; - +// Square root of Hanning window in Q14. +#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON) +// Table is defined in an ARM assembly file. +extern const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END; #else - -// Square root of Hanning window in Q14 -const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = -{ - 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, - 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, 8364, - 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 11795, 12068, 12335, - 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, 14384, 14571, 14749, 14918, - 15079, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 16111, 16179, 16237, - 16286, 16325, 16354, 16373, 16384 +static const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 }; - #endif //Q15 alpha = 0.99439986968132 const Factor for magnitude approximation diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.h b/webrtc/modules/audio_processing/aecm/aecm_core.h index a0fac5dd1..3e0816fba 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.h +++ b/webrtc/modules/audio_processing/aecm/aecm_core.h @@ -25,8 +25,6 @@ #define ALIGN8_END __attribute__((aligned(8))) #endif -extern const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END; - typedef struct { WebRtc_Word16 real; WebRtc_Word16 imag; diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_neon.S b/webrtc/modules/audio_processing/aecm/aecm_core_neon.S index 833575ea0..65db08e3f 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.S +++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.S @@ -16,8 +16,7 @@ #include "aecm_core_neon_offsets.h" #include "webrtc/system_wrappers/interface/asm_defines.h" -.extern WebRtcAecm_kSqrtHanning - +GLOBAL_LABEL WebRtcAecm_kSqrtHanning GLOBAL_FUNCTION WebRtcAecm_WindowAndFFTNeon GLOBAL_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon @@ -38,7 +37,7 @@ DEFINE_FUNCTION WebRtcAecm_WindowAndFFTNeon vmov.i16 d21, #0 @ For imaginary parts of |fft|. vmov.i16 d27, #0 @ For imaginary parts of |fft|. - ldr r5, =WebRtcAecm_kSqrtHanning + adr r5, WebRtcAecm_kSqrtHanning adr lr, kSqrtHanningReversed add r4, r1, #(PART_LEN2 * 2) @ &fft[PART_LEN2] add r12, r2, #(PART_LEN * 2) @ time_signal[PART_LEN] @@ -133,7 +132,7 @@ LOOP_PRE_IFFT: ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0] adr r12, kSqrtHanningReversed - ldr r6, =WebRtcAecm_kSqrtHanning + adr r6, WebRtcAecm_kSqrtHanning rsb r0, r2, r0 @ outCFFT - aecm->dfaCleanQDomain vdup.32 q9, r0 add r0, r4, #(PART_LEN * 4) @ &efw[PART_LEN] @@ -321,8 +320,21 @@ LOOP_RESET_ADAPTIVE_CHANNEL: bx lr - @ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning, - @ the order was reversed and one useless element (0) was removed. +@ Square root of Hanning window in Q14. +.align 3 +WebRtcAecm_kSqrtHanning: + .short 0 + .short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172 + .short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224 + .short 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040 + .short 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514 + .short 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553 + .short 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079 + .short 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034 + .short 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 + +@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning, +@ the order was reversed and one element (0) was removed. .align 3 kSqrtHanningReversed: .short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947 diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c index d8250ef08..cf30cee99 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c @@ -19,23 +19,28 @@ // generating script and makefile, to replace these C functions. // Square root of Hanning window in Q14. -static const WebRtc_Word16 kSqrtHanningReversed[] ALIGN8_END = { - 16384, 16373, 16354, 16325, - 16286, 16237, 16179, 16111, - 16034, 15947, 15851, 15746, - 15631, 15506, 15373, 15231, - 15079, 14918, 14749, 14571, - 14384, 14189, 13985, 13773, - 13553, 13325, 13089, 12845, - 12594, 12335, 12068, 11795, - 11514, 11227, 10933, 10633, - 10326, 10013, 9695, 9370, - 9040, 8705, 8364, 8019, - 7668, 7313, 6954, 6591, - 6224, 5853, 5478, 5101, - 4720, 4337, 3951, 3562, - 3172, 2780, 2386, 1990, - 1594, 1196, 798, 399 +const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, + 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +// Square root of Hanning window in Q14, in reversed order. +static const ALIGN8_BEG WebRtc_Word16 kSqrtHanningReversed[] ALIGN8_END = { + 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, + 16034, 15947, 15851, 15746, 15631, 15506, 15373, 15231, + 15079, 14918, 14749, 14571, 14384, 14189, 13985, 13773, + 13553, 13325, 13089, 12845, 12594, 12335, 12068, 11795, + 11514, 11227, 10933, 10633, 10326, 10013, 9695, 9370, + 9040, 8705, 8364, 8019, 7668, 7313, 6954, 6591, + 6224, 5853, 5478, 5101, 4720, 4337, 3951, 3562, + 3172, 2780, 2386, 1990, 1594, 1196, 798, 399 }; void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm, diff --git a/webrtc/modules/audio_processing/aecm/aecm_defines.h b/webrtc/modules/audio_processing/aecm/aecm_defines.h index 437cbf255..6d63990b9 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_defines.h +++ b/webrtc/modules/audio_processing/aecm/aecm_defines.h @@ -13,18 +13,11 @@ #define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ -/* #define AECM_SHORT For 32 sample partition length. */ - /* Algorithm parameters */ #define FRAME_LEN 80 /* Total frame length, 10 ms. */ -#ifdef AECM_SHORT -#define PART_LEN 32 /* Length of partition. */ -#define PART_LEN_SHIFT 6 /* Length of (PART_LEN * 2) in base 2. */ -#else #define PART_LEN 64 /* Length of partition. */ #define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ -#endif #define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ #define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ @@ -33,11 +26,7 @@ #define MAX_DELAY 100 /* Counter parameters */ -#ifdef AECM_SHORT -#define CONV_LEN 1024 /* Convergence length used at startup. */ -#else #define CONV_LEN 512 /* Convergence length used at startup. */ -#endif #define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ /* Energy parameters */ diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c index 289407097..a605c1ed5 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.c +++ b/webrtc/modules/audio_processing/ns/nsx_core.c @@ -20,48 +20,56 @@ #include "cpu_features_wrapper.h" #include "nsx_core.h" -// Skip first frequency bins during estimation. (0 <= value < 64) -static const int kStartBand = 5; - -// Constants to compensate for shifting signal log(2^shifts). -const WebRtc_Word16 WebRtcNsx_kLogTable[9] = { +#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON) +/* Tables are defined in ARM assembly files. */ +extern const WebRtc_Word16 WebRtcNsx_kLogTable[9]; +extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201]; +extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256]; +#else +static const WebRtc_Word16 WebRtcNsx_kLogTable[9] = { 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 }; -const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = { - 32767, 16384, 10923, 8192, 6554, 5461, 4681, - 4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, - 1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, - 886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, - 607, 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, 468, - 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, - 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315, - 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278, 275, 273, 271, - 269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248, 246, 245, 243, 241, 239, 237, - 236, 234, 232, 231, 229, 228, 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, - 210, 209, 207, 206, 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, - 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, +static const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 }; -const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = { +static const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = { 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, - 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, - 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, - 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, - 193, 194, 195, 196, 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, - 207, 208, 208, 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, - 220, 220, 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, - 232, 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, - 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 }; +#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON + +// Skip first frequency bins during estimation. (0 <= value < 64) +static const int kStartBand = 5; static const WebRtc_Word16 kPowTableFrac[1024] = { 0, 1, 1, 2, 3, 3, 4, 5, diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h index 618193b28..e5b3919af 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.h +++ b/webrtc/modules/audio_processing/ns/nsx_core.h @@ -227,10 +227,6 @@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor); void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff); #endif -extern const WebRtc_Word16 WebRtcNsx_kLogTable[9]; -extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256]; -extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201]; - #ifdef __cplusplus } #endif diff --git a/webrtc/modules/audio_processing/ns/nsx_core_neon.S b/webrtc/modules/audio_processing/ns/nsx_core_neon.S index 1eb46293c..ccea6ad06 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core_neon.S +++ b/webrtc/modules/audio_processing/ns/nsx_core_neon.S @@ -22,6 +22,48 @@ GLOBAL_FUNCTION WebRtcNsx_SynthesisUpdateNeon GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon GLOBAL_FUNCTION WebRtcNsx_DenormalizeNeon GLOBAL_FUNCTION WebRtcNsx_CreateComplexBufferNeon +GLOBAL_LABEL WebRtcNsx_kLogTable +GLOBAL_LABEL WebRtcNsx_kCounterDiv +GLOBAL_LABEL WebRtcNsx_kLogTableFrac + +WebRtcNsx_kLogTableFrac: +.short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26 +.short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50 +.short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72 +.short 73, 74, 75, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93 +.short 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110 +.short 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 +.short 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141 +.short 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 155 +.short 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 169 +.short 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, 179, 180, 181, 182, 183 +.short 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, 195, 196 +.short 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, 207, 208, 208 +.short 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, 220, 220 +.short 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, 232 +.short 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244 +.short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255 +.short 255 + +WebRtcNsx_kCounterDiv: +.short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979 +.short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489 +.short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964 +.short 936, 910, 886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683 +.short 669, 655, 643, 630, 618, 607, 596, 585, 575, 565, 555, 546, 537, 529 +.short 520, 512, 504, 496, 489, 482, 475, 468, 462, 455, 449, 443, 437, 431 +.short 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, 372, 368, 364 +.short 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315 +.short 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278 +.short 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248 +.short 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, 226, 224 +.short 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, 205 +.short 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188 +.short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174 +.short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 + +WebRtcNsx_kLogTable: +.short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 @ void NoiseEstimationNeon(NsxInst_t* inst, @ uint16_t* magn, @@ -56,7 +98,7 @@ DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon movw r4, #offset_nsx_normData ldr r2, [r0, #offset_nsx_stages] @ inst->stages ldr r4, [r0, r4] @ inst->normData - ldr r12, =WebRtcNsx_kLogTable + adr r12, WebRtcNsx_kLogTable subs r3, r2, r4 @ tabind = inst->stages - inst->normData; ldr r5, [r0, #offset_nsx_magnLen] @ magnLen rsblt r3, #0 @@ -67,7 +109,7 @@ DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon str r3, [sp] vdup.16 q15, r3 - ldr r9, =WebRtcNsx_kLogTableFrac + adr r9, WebRtcNsx_kLogTableFrac LOOP_SET_LMAGN: ldrh r2, [r1], #2 @ magn[i] @@ -114,7 +156,7 @@ CHECK_LMAGN_COUNTER: LOOP_SIMULT: ldrsh r1, [r10] @ inst->noiseEstCounter[s] - ldr r3, =WebRtcNsx_kCounterDiv + adr r3, WebRtcNsx_kCounterDiv mov r11, r1, lsl #1 @ counter ldrh r11, [r3, r11] @ countDiv = WebRtcNsx_kCounterDiv[counter]; sub r12, r6, #1 @ Loop counter. diff --git a/webrtc/modules/audio_processing/ns/nsx_core_neon.c b/webrtc/modules/audio_processing/ns/nsx_core_neon.c index 8fc74e72d..796b3eae9 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core_neon.c +++ b/webrtc/modules/audio_processing/ns/nsx_core_neon.c @@ -13,6 +13,48 @@ #include #include +// Constants to compensate for shifting signal log(2^shifts). +const WebRtc_Word16 WebRtcNsx_kLogTable[9] = { + 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 +}; + +const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 +}; + +const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; + // Update the noise estimation information. static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) { const int16_t kExp2Const = 11819; // Q13 diff --git a/webrtc/system_wrappers/interface/asm_defines.h b/webrtc/system_wrappers/interface/asm_defines.h index 2be5a632c..49a12e161 100644 --- a/webrtc/system_wrappers/interface/asm_defines.h +++ b/webrtc/system_wrappers/interface/asm_defines.h @@ -27,6 +27,9 @@ _\name: .macro CALL_FUNCTION name bl _\name .endm +.macro GLOBAL_LABEL name +.global _\name +.endm #else .macro GLOBAL_FUNCTION name .global \name @@ -37,6 +40,9 @@ bl _\name .macro CALL_FUNCTION name bl \name .endm +.macro GLOBAL_LABEL name +.global \name +.endm #endif .text