Fixed text relocation code related to ARM assembly code.

Refer to WebRTC issue 1300.
Review URL: https://webrtc-codereview.appspot.com/1055004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3409 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org
2013-01-25 03:18:05 +00:00
parent e8482f0e9f
commit 16d540eff1
12 changed files with 278 additions and 180 deletions

View File

@@ -15,14 +15,21 @@
* *
*/ */
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
/* Tables are defined in ARM assembly files. */
/* Cosine table 1 in Q14 */ /* Cosine table 1 in Q14 */
const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { extern const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2];
/* Sine table 1 in Q14 */
extern const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2];
/* Sine table 2 in Q14 */
extern const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4];
#else
/* Cosine table 1 in Q14 */
static const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = {
16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
@@ -41,17 +48,17 @@ const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
-6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006, -6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006,
-8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803, -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803,
-9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433, -9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433,
-11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, -12867, -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733,
-12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, -13970, -14081, -12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856,
-14189, -14295, -14399, -14500, -14598, -14694, -14788, -14879, -14968, -15053, -13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788,
-15137, -15218, -15296, -15371, -15444, -15515, -15582, -15647, -15709, -15769, -14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515,
-15826, -15880, -15931, -15980, -16026, -16069, -16110, -16147, -16182, -16214, -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026,
-16244, -16270, -16294, -16315, -16333, -16349, -16362, -16371, -16378, -16383 -16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315,
-16333, -16349, -16362, -16371, -16378, -16383
}; };
/* Sine table 1 in Q14 */ /* Sine table 1 in Q14 */
const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { static const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = {
0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926, 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926,
2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033, 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033,
4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071, 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071,
@@ -80,7 +87,7 @@ const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
/* Sine table 2 in Q14 */ /* Sine table 2 in Q14 */
const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { static const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = {
16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257, 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853, 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853,
15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178, 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178,
@@ -94,6 +101,7 @@ const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245, 4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245,
2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
}; };
#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
// Declare function pointers. // Declare function pointers.
Spec2Time WebRtcIsacfix_Spec2Time; Spec2Time WebRtcIsacfix_Spec2Time;
@@ -115,8 +123,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
/* Multiply with complex exponentials and combine into one complex vector */ /* Multiply with complex exponentials and combine into one complex vector */
factQ19 = 16921; // 0.5/sqrt(240) in Q19 is round(.5/sqrt(240)*(2^19)) = 16921 factQ19 = 16921; // 0.5/sqrt(240) in Q19 is round(.5/sqrt(240)*(2^19)) = 16921
for (k = 0; k < FRAMESAMPLES/2; k++) { for (k = 0; k < FRAMESAMPLES/2; k++) {
tmp1rQ14 = kCosTab1[k]; tmp1rQ14 = WebRtcIsacfix_kCosTab1[k];
tmp1iQ14 = kSinTab1[k]; tmp1iQ14 = WebRtcIsacfix_kSinTab1[k];
xrQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre1Q9[k]) + WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre2Q9[k]), 7); xrQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre1Q9[k]) + WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre2Q9[k]), 7);
xiQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre2Q9[k]) - WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre1Q9[k]), 7); xiQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre2Q9[k]) - WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre1Q9[k]), 7);
tmpreQ16[k] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(factQ19, xrQ16)+4, 3); // (Q16*Q19>>16)>>3 = Q16 tmpreQ16[k] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(factQ19, xrQ16)+4, 3); // (Q16*Q19>>16)>>3 = Q16
@@ -171,8 +179,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
yiQ16 = -tmpreQ16[k] + tmpreQ16[FRAMESAMPLES/2 - 1 - k]; yiQ16 = -tmpreQ16[k] + tmpreQ16[FRAMESAMPLES/2 - 1 - k];
xiQ16 = tmpimQ16[k] - tmpimQ16[FRAMESAMPLES/2 - 1 - k]; xiQ16 = tmpimQ16[k] - tmpimQ16[FRAMESAMPLES/2 - 1 - k];
yrQ16 = tmpimQ16[k] + tmpimQ16[FRAMESAMPLES/2 - 1 - k]; yrQ16 = tmpimQ16[k] + tmpimQ16[FRAMESAMPLES/2 - 1 - k];
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k]; tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k];
tmp1iQ14 = kSinTab2[k]; tmp1iQ14 = WebRtcIsacfix_kSinTab2[k];
v1Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xrQ16) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xiQ16); v1Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xrQ16) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xiQ16);
v2Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xrQ16) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xiQ16); v2Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xrQ16) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xiQ16);
outreQ7[k] = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(v1Q16, 9); outreQ7[k] = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(v1Q16, 9);
@@ -198,8 +206,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
for (k = 0; k < FRAMESAMPLES/4; k++) { for (k = 0; k < FRAMESAMPLES/4; k++) {
/* Move zero in time to beginning of frames */ /* Move zero in time to beginning of frames */
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k]; tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k];
tmp1iQ14 = kSinTab2[k]; tmp1iQ14 = WebRtcIsacfix_kSinTab2[k];
tmpInRe = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inreQ7[k], 9); // Q7 -> Q16 tmpInRe = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inreQ7[k], 9); // Q7 -> Q16
tmpInIm = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inimQ7[k], 9); // Q7 -> Q16 tmpInIm = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inimQ7[k], 9); // Q7 -> Q16
@@ -270,8 +278,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
/* Demodulate and separate */ /* Demodulate and separate */
factQ11 = 31727; // sqrt(240) in Q11 is round(15.49193338482967*2048) = 31727 factQ11 = 31727; // sqrt(240) in Q11 is round(15.49193338482967*2048) = 31727
for (k = 0; k < FRAMESAMPLES/2; k++) { for (k = 0; k < FRAMESAMPLES/2; k++) {
tmp1rQ14 = kCosTab1[k]; tmp1rQ14 = WebRtcIsacfix_kCosTab1[k];
tmp1iQ14 = kSinTab1[k]; tmp1iQ14 = WebRtcIsacfix_kSinTab1[k];
xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre1Q16[k]) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre2Q16[k]); xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre1Q16[k]) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre2Q16[k]);
xiQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre2Q16[k]) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre1Q16[k]); xiQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre2Q16[k]) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre1Q16[k]);
xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16); xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16);

View File

@@ -16,6 +16,9 @@
GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon
GLOBAL_FUNCTION WebRtcIsacfix_Time2SpecNeon GLOBAL_FUNCTION WebRtcIsacfix_Time2SpecNeon
GLOBAL_LABEL WebRtcIsacfix_kSinTab1
GLOBAL_LABEL WebRtcIsacfix_kCosTab1
GLOBAL_LABEL WebRtcIsacfix_kSinTab2
@ void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9, @ void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9,
@ WebRtc_Word16* inre2Q9, @ WebRtc_Word16* inre2Q9,
@@ -39,9 +42,9 @@ DEFINE_FUNCTION WebRtcIsacfix_Time2SpecNeon
add r4, sp, #16 @ tmpreQ16; add r4, sp, #16 @ tmpreQ16;
add r5, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16; add r5, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16;
adr r9, kCosTab1 adr r9, WebRtcIsacfix_kCosTab1
mov r6, #(kSinTab1 - kCosTab1) mov r6, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1)
add r10, r9, r6 @ kSinTab1 add r10, r9, r6 @ WebRtcIsacfix_kSinTab1
vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm. vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm.
vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe. vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe.
@@ -55,18 +58,18 @@ Time2Spec_TransformAndFindMax:
subs r8, #8 subs r8, #8
vld1.16 {q0}, [r9:64]! @ kCosTab1[] vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[]
vld1.16 {q2}, [r0]! @ inre1Q9[] vld1.16 {q2}, [r0]! @ inre1Q9[]
vmull.s16 q8, d0, d4 @ kCosTab1[k] * inre1Q9[k] vmull.s16 q8, d0, d4 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k]
vld1.16 {q1}, [r10:64]! @ kSinTab1[] vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[]
vmull.s16 q9, d1, d5 @ kCosTab1[k] * inre1Q9[k] vmull.s16 q9, d1, d5 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k]
vld1.16 {q3}, [r1]! @ inre2Q9[] vld1.16 {q3}, [r1]! @ inre2Q9[]
vmlal.s16 q8, d2, d6 @ kSinTab1[k] * inre2Q9[k] vmlal.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k]
vmlal.s16 q9, d3, d7 @ kSinTab1[k] * inre2Q9[k] vmlal.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k]
vmull.s16 q12, d0, d6 @ kCosTab1[k] * inre2Q9[k] vmull.s16 q12, d0, d6 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k]
vmull.s16 q13, d1, d7 @ kCosTab1[k] * inre2Q9[k] vmull.s16 q13, d1, d7 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k]
vmlsl.s16 q12, d2, d4 @ kSinTab1[k] * inre1Q9[k] vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k]
vmlsl.s16 q13, d3, d5 @ kSinTab1[k] * inre1Q9[k] vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k]
vqdmulh.s32 q0, q8, q4 @ xrQ16 * factQ19 vqdmulh.s32 q0, q8, q4 @ xrQ16 * factQ19
vqdmulh.s32 q1, q9, q4 @ xrQ16 * factQ19 vqdmulh.s32 q1, q9, q4 @ xrQ16 * factQ19
@@ -153,9 +156,9 @@ Time2Spec_PreFftShift:
ldr r7, [sp, #4] @ inre2Q9 ldr r7, [sp, #4] @ inre2Q9
add r4, r6, r8 @ &inre1Q9[FRAMESAMPLES / 2 - 4] add r4, r6, r8 @ &inre1Q9[FRAMESAMPLES / 2 - 4]
add r5, r7, r8 @ &inre2Q9[FRAMESAMPLES / 2 - 4] add r5, r7, r8 @ &inre2Q9[FRAMESAMPLES / 2 - 4]
adr r10, kSinTab2 adr r10, WebRtcIsacfix_kSinTab2
add r9, r10, #(120*2 - 8) @ &kSinTab2[119 - 4] add r9, r10, #(120*2 - 8) @ &WebRtcIsacfix_kSinTab2[119 - 4]
mov r8, #(FRAMESAMPLES / 4) @ loop counter mov r8, #(FRAMESAMPLES / 4) @ loop counter
@@ -170,9 +173,9 @@ Time2Spec_PostFftTransform:
@ By vshl, we effectively did "<< (-sh - 23)", instead of "<< (-sh)", @ By vshl, we effectively did "<< (-sh - 23)", instead of "<< (-sh)",
@ ">> 14" and then ">> 9" as in the C code. @ ">> 14" and then ">> 9" as in the C code.
vld1.16 {d6}, [r9:64] @ kCosTab2[] vld1.16 {d6}, [r9, :64] @ kCosTab2[]
vneg.s16 d6, d6 vneg.s16 d6, d6
vld1.16 {d7}, [r10:64]! @ kSinTab2[] vld1.16 {d7}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[]
vrev64.16 q1, q1 @ Reverse samples in 2nd half of xrQ16[]. vrev64.16 q1, q1 @ Reverse samples in 2nd half of xrQ16[].
vqadd.s16 d4, d0, d2 @ xrQ16 vqadd.s16 d4, d0, d2 @ xrQ16
vqsub.s16 d5, d1, d3 @ xiQ16 vqsub.s16 d5, d1, d3 @ xiQ16
@@ -187,12 +190,12 @@ Time2Spec_PostFftTransform:
vqsub.s16 d0, d2, d0 @ yiQ16 vqsub.s16 d0, d2, d0 @ yiQ16
vmull.s16 q12, d6, d4 @ kCosTab2[k] * xrQ16 vmull.s16 q12, d6, d4 @ kCosTab2[k] * xrQ16
vmlsl.s16 q12, d7, d5 @ kSinTab2[k] * xiQ16 vmlsl.s16 q12, d7, d5 @ WebRtcIsacfix_kSinTab2[k] * xiQ16
vmull.s16 q13, d7, d4 @ kSinTab2[k] * xrQ16 vmull.s16 q13, d7, d4 @ WebRtcIsacfix_kSinTab2[k] * xrQ16
vmlal.s16 q13, d6, d5 @ kCosTab2[k] * xiQ16 vmlal.s16 q13, d6, d5 @ kCosTab2[k] * xiQ16
vmull.s16 q6, d7, d1 @ kSinTab2[k] * yrQ16 vmull.s16 q6, d7, d1 @ WebRtcIsacfix_kSinTab2[k] * yrQ16
vmlal.s16 q6, d6, d0 @ kCosTab2[k] * yiQ16 vmlal.s16 q6, d6, d0 @ kCosTab2[k] * yiQ16
vmull.s16 q7, d7, d0 @ kSinTab2[k] * yiQ16 vmull.s16 q7, d7, d0 @ WebRtcIsacfix_kSinTab2[k] * yiQ16
vmlsl.s16 q7, d6, d1 @ kCosTab2[k] * yrQ16 vmlsl.s16 q7, d6, d1 @ kCosTab2[k] * yrQ16
vshl.s32 q12, q12, q15 vshl.s32 q12, q12, q15
@@ -225,7 +228,7 @@ Time2Spec_PostFftTransform:
.align 8 .align 8
@ Cosine table 1 in Q14 @ Cosine table 1 in Q14
kCosTab1: WebRtcIsacfix_kCosTab1:
.short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315 .short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315
.short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069 .short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069
.short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647 .short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647
@@ -259,7 +262,7 @@ kCosTab1:
.align 8 .align 8
@ Sine table 2 in Q14 @ Sine table 2 in Q14
kSinTab2: WebRtcIsacfix_kSinTab2:
.short 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305 .short 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305
.short 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048 .short 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048
.short 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615 .short 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615
@@ -280,7 +283,7 @@ kSinTab2:
.align 8 .align 8
@ Sine table 1 in Q14 @ Sine table 1 in Q14
kSinTab1: WebRtcIsacfix_kSinTab1:
.short 0, 214, 429, 643, 857, 1072, 1285, 1499 .short 0, 214, 429, 643, 857, 1072, 1285, 1499
.short 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196 .short 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196
.short 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859 .short 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859
@@ -334,8 +337,8 @@ DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon
add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES / 2 - 8] add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES / 2 - 8]
mov r8, #(FRAMESAMPLES / 2) @ loop counter mov r8, #(FRAMESAMPLES / 2) @ loop counter
adr r10, kSinTab2 adr r10, WebRtcIsacfix_kSinTab2
add r9, r10, #(120*2 - 16) @ &kSinTab2[119 - 8] add r9, r10, #(120*2 - 16) @ &WebRtcIsacfix_kSinTab2[119 - 8]
mov r5, #-32 mov r5, #-32
@@ -349,24 +352,24 @@ TransformAndFindMax:
subs r8, #16 subs r8, #16
vld1.16 {q0}, [r9:64] @ kCosTab2[] vld1.16 {q0}, [r9, :64] @ kCosTab2[]
sub r9, #16 sub r9, #16
vld1.16 {q2}, [r0]! @ inreQ7[] vld1.16 {q2}, [r0]! @ inreQ7[]
vneg.s16 q0, q0 vneg.s16 q0, q0
vld1.16 {q3}, [r1]! @ inimQ7[] vld1.16 {q3}, [r1]! @ inimQ7[]
vrev64.16 d0, d0 vrev64.16 d0, d0
vrev64.16 d1, d1 vrev64.16 d1, d1
vld1.16 {q1}, [r10:64]! @ kSinTab2[] vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[]
vswp d0, d1 vswp d0, d1
vmull.s16 q8, d2, d6 @ kSinTab2[k] * inimQ7[k] vmull.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmull.s16 q9, d3, d7 @ kSinTab2[k] * inimQ7[k] vmull.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmlal.s16 q8, d0, d4 @ kCosTab2[k] * inreQ7[k] vmlal.s16 q8, d0, d4 @ kCosTab2[k] * inreQ7[k]
vmlal.s16 q9, d1, d5 @ kCosTab2[k] * inreQ7[k] vmlal.s16 q9, d1, d5 @ kCosTab2[k] * inreQ7[k]
vmull.s16 q12, d0, d6 @ kCosTab2[k] * inimQ7[k] vmull.s16 q12, d0, d6 @ kCosTab2[k] * inimQ7[k]
vmull.s16 q13, d1, d7 @ kCosTab2[k] * inimQ7[k] vmull.s16 q13, d1, d7 @ kCosTab2[k] * inimQ7[k]
vmlsl.s16 q12, d2, d4 @ kSinTab2[k] * inreQ7[k] vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k]
vmlsl.s16 q13, d3, d5 @ kSinTab2[k] * inreQ7[k] vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k]
vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES / 2 - 8 + i] vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES / 2 - 8 + i]
vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES / 2 - 8 + i] vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES / 2 - 8 + i]
@@ -374,15 +377,15 @@ TransformAndFindMax:
vrev64.16 q2, q2 @ Reverse the order of the samples vrev64.16 q2, q2 @ Reverse the order of the samples
vrev64.16 q3, q3 @ Reverse the order of the samples vrev64.16 q3, q3 @ Reverse the order of the samples
vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k] vmull.s16 q14, d2, d5 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k] vmull.s16 q15, d3, d4 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmlsl.s16 q14, d0, d7 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k] vmlsl.s16 q14, d0, d7 @ q14 -= kCosTab2[k] * inreQ7[k]
vmlsl.s16 q15, d1, d6 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k] vmlsl.s16 q15, d1, d6 @ q15 -= kCosTab2[k] * inreQ7[k]
vmull.s16 q10, d0, d5 @ kCosTab2[k] * inimQ7[] vmull.s16 q10, d0, d5 @ kCosTab2[k] * inimQ7[]
vmull.s16 q11, d1, d4 @ kCosTab2[k] * inimQ7[] vmull.s16 q11, d1, d4 @ kCosTab2[k] * inimQ7[]
vmlal.s16 q10, d2, d7 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] vmlal.s16 q10, d2, d7 @ q10 += WebRtcIsacfix_kSinTab2[k] * inreQ7[]
vmlal.s16 q11, d3, d6 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] vmlal.s16 q11, d3, d6 @ q11 += WebRtcIsacfix_kSinTab2[k] * inreQ7[]
vshr.s32 q8, q8, #5 @ xrQ16 vshr.s32 q8, q8, #5 @ xrQ16
vshr.s32 q9, q9, #5 @ xrQ16 vshr.s32 q9, q9, #5 @ xrQ16
@@ -448,9 +451,9 @@ TransformAndFindMax:
bgt TransformAndFindMax bgt TransformAndFindMax
adr r10, kSinTab1 adr r10, WebRtcIsacfix_kSinTab1
mov r2, #(kSinTab1 - kCosTab1) mov r2, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1)
sub r9, r10, r2 @ kCosTab1 sub r9, r10, r2 @ WebRtcIsacfix_kCosTab1
@ Find the maximum value in the Neon registers @ Find the maximum value in the Neon registers
vmax.u32 d12, d13 vmax.u32 d12, d13
@@ -565,39 +568,39 @@ PostFftShiftDivide:
DemodulateAndSeparate: DemodulateAndSeparate:
subs r8, #8 subs r8, #8
vld1.16 {q0}, [r9:64]! @ kCosTab1[] vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[]
vmovl.s16 q6, d0 @ kCosTab1[] vmovl.s16 q6, d0 @ WebRtcIsacfix_kCosTab1[]
vld1.16 {q1}, [r10:64]! @ kSinTab1[] vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[]
vmovl.s16 q7, d1 @ kCosTab1[] vmovl.s16 q7, d1 @ WebRtcIsacfix_kCosTab1[]
vld1.32 {q2, q3}, [r2] @ outre1Q16 vld1.32 {q2, q3}, [r2] @ outre1Q16
vmovl.s16 q8, d2 @ kSinTab1[] vmovl.s16 q8, d2 @ WebRtcIsacfix_kSinTab1[]
vld1.32 {q4, q5}, [r3] @ outre2Q16 vld1.32 {q4, q5}, [r3] @ outre2Q16
vmovl.s16 q9, d3 @ kSinTab1[] vmovl.s16 q9, d3 @ WebRtcIsacfix_kSinTab1[]
vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k] vmull.s32 q10, d12, d4 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmull.s32 q11, d13, d5 @ kCosTab1[k] * outre1Q16[k] vmull.s32 q11, d13, d5 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k] vmull.s32 q12, d14, d6 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k] vmull.s32 q13, d15, d7 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmlsl.s32 q10, d16, d8 @ += kSinTab1[k] * outre2Q16[k] vmlsl.s32 q10, d16, d8 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q11, d17, d9 @ += kSinTab1[k] * outre2Q16[k] vmlsl.s32 q11, d17, d9 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q12, d18, d10 @ += kSinTab1[k] * outre2Q16[k] vmlsl.s32 q12, d18, d10 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q13, d19, d11 @ += kSinTab1[k] * outre2Q16[k] vmlsl.s32 q13, d19, d11 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vrshrn.s64 d20, q10, #10 @ xrQ16 vrshrn.s64 d20, q10, #10 @ xrQ16
vrshrn.s64 d21, q11, #10 @ xrQ16 vrshrn.s64 d21, q11, #10 @ xrQ16
vrshrn.s64 d22, q12, #10 @ xrQ16 vrshrn.s64 d22, q12, #10 @ xrQ16
vrshrn.s64 d23, q13, #10 @ xrQ16 vrshrn.s64 d23, q13, #10 @ xrQ16
vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k] vmull.s32 q12, d12, d8 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k] vmull.s32 q13, d13, d9 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k] vmull.s32 q14, d14, d10 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k] vmull.s32 q15, d15, d11 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmlal.s32 q12, d16, d4 @ += kSinTab1[k] * outre1Q16[k] vmlal.s32 q12, d16, d4 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vmlal.s32 q13, d17, d5 @ += kSinTab1[k] * outre1Q16[k] vmlal.s32 q13, d17, d5 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vmlal.s32 q14, d18, d6 @ += kSinTab1[k] * outre1Q16[k] vmlal.s32 q14, d18, d6 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vmlal.s32 q15, d19, d7 @ += kSinTab1[k] * outre1Q16[k] vmlal.s32 q15, d19, d7 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vdup.s32 q4, r0 @ generic -> Neon doesn't cost extra cycles. vdup.s32 q4, r0 @ generic -> Neon doesn't cost extra cycles.
@@ -622,4 +625,3 @@ DemodulateAndSeparate:
add sp, sp, #16 add sp, sp, #16
vpop {q4-q7} vpop {q4-q7}
pop {r4-r11,pc} pop {r4-r11,pc}

View File

@@ -27,31 +27,21 @@ FILE *dfile;
FILE *testfile; FILE *testfile;
#endif #endif
#ifdef AECM_SHORT // Square root of Hanning window in Q14.
#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
// Square root of Hanning window in Q14 // Table is defined in an ARM assembly file.
const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] = extern const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END;
{
0, 804, 1606, 2404, 3196, 3981, 4756, 5520,
6270, 7005, 7723, 8423, 9102, 9760, 10394, 11003,
11585, 12140, 12665, 13160, 13623, 14053, 14449, 14811,
15137, 15426, 15679, 15893, 16069, 16207, 16305, 16364,
16384
};
#else #else
static const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
// Square root of Hanning window in Q14 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
{ 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, 8364, 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 11795, 12068, 12335, 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
15079, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 16111, 16179, 16237, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
16286, 16325, 16354, 16373, 16384
}; };
#endif #endif
//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation //Q15 alpha = 0.99439986968132 const Factor for magnitude approximation

View File

@@ -25,8 +25,6 @@
#define ALIGN8_END __attribute__((aligned(8))) #define ALIGN8_END __attribute__((aligned(8)))
#endif #endif
extern const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END;
typedef struct { typedef struct {
WebRtc_Word16 real; WebRtc_Word16 real;
WebRtc_Word16 imag; WebRtc_Word16 imag;

View File

@@ -16,8 +16,7 @@
#include "aecm_core_neon_offsets.h" #include "aecm_core_neon_offsets.h"
#include "webrtc/system_wrappers/interface/asm_defines.h" #include "webrtc/system_wrappers/interface/asm_defines.h"
.extern WebRtcAecm_kSqrtHanning GLOBAL_LABEL WebRtcAecm_kSqrtHanning
GLOBAL_FUNCTION WebRtcAecm_WindowAndFFTNeon GLOBAL_FUNCTION WebRtcAecm_WindowAndFFTNeon
GLOBAL_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon GLOBAL_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
@@ -38,7 +37,7 @@ DEFINE_FUNCTION WebRtcAecm_WindowAndFFTNeon
vmov.i16 d21, #0 @ For imaginary parts of |fft|. vmov.i16 d21, #0 @ For imaginary parts of |fft|.
vmov.i16 d27, #0 @ For imaginary parts of |fft|. vmov.i16 d27, #0 @ For imaginary parts of |fft|.
ldr r5, =WebRtcAecm_kSqrtHanning adr r5, WebRtcAecm_kSqrtHanning
adr lr, kSqrtHanningReversed adr lr, kSqrtHanningReversed
add r4, r1, #(PART_LEN2 * 2) @ &fft[PART_LEN2] add r4, r1, #(PART_LEN2 * 2) @ &fft[PART_LEN2]
add r12, r2, #(PART_LEN * 2) @ time_signal[PART_LEN] add r12, r2, #(PART_LEN * 2) @ time_signal[PART_LEN]
@@ -133,7 +132,7 @@ LOOP_PRE_IFFT:
ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0] ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0]
adr r12, kSqrtHanningReversed adr r12, kSqrtHanningReversed
ldr r6, =WebRtcAecm_kSqrtHanning adr r6, WebRtcAecm_kSqrtHanning
rsb r0, r2, r0 @ outCFFT - aecm->dfaCleanQDomain rsb r0, r2, r0 @ outCFFT - aecm->dfaCleanQDomain
vdup.32 q9, r0 vdup.32 q9, r0
add r0, r4, #(PART_LEN * 4) @ &efw[PART_LEN] add r0, r4, #(PART_LEN * 4) @ &efw[PART_LEN]
@@ -321,8 +320,21 @@ LOOP_RESET_ADAPTIVE_CHANNEL:
bx lr bx lr
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning, @ Square root of Hanning window in Q14.
@ the order was reversed and one useless element (0) was removed. .align 3
WebRtcAecm_kSqrtHanning:
.short 0
.short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
.short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
.short 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040
.short 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514
.short 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553
.short 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079
.short 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034
.short 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
@ the order was reversed and one element (0) was removed.
.align 3 .align 3
kSqrtHanningReversed: kSqrtHanningReversed:
.short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947 .short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947

View File

@@ -19,23 +19,28 @@
// generating script and makefile, to replace these C functions. // generating script and makefile, to replace these C functions.
// Square root of Hanning window in Q14. // Square root of Hanning window in Q14.
static const WebRtc_Word16 kSqrtHanningReversed[] ALIGN8_END = { const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
16384, 16373, 16354, 16325, 0,
16286, 16237, 16179, 16111, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
16034, 15947, 15851, 15746, 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
15631, 15506, 15373, 15231, 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
15079, 14918, 14749, 14571, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
14384, 14189, 13985, 13773, 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
13553, 13325, 13089, 12845, 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
12594, 12335, 12068, 11795, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
11514, 11227, 10933, 10633, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
10326, 10013, 9695, 9370, };
9040, 8705, 8364, 8019,
7668, 7313, 6954, 6591, // Square root of Hanning window in Q14, in reversed order.
6224, 5853, 5478, 5101, static const ALIGN8_BEG WebRtc_Word16 kSqrtHanningReversed[] ALIGN8_END = {
4720, 4337, 3951, 3562, 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111,
3172, 2780, 2386, 1990, 16034, 15947, 15851, 15746, 15631, 15506, 15373, 15231,
1594, 1196, 798, 399 15079, 14918, 14749, 14571, 14384, 14189, 13985, 13773,
13553, 13325, 13089, 12845, 12594, 12335, 12068, 11795,
11514, 11227, 10933, 10633, 10326, 10013, 9695, 9370,
9040, 8705, 8364, 8019, 7668, 7313, 6954, 6591,
6224, 5853, 5478, 5101, 4720, 4337, 3951, 3562,
3172, 2780, 2386, 1990, 1594, 1196, 798, 399
}; };
void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm, void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,

View File

@@ -13,18 +13,11 @@
#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ #define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */
/* #define AECM_SHORT For 32 sample partition length. */
/* Algorithm parameters */ /* Algorithm parameters */
#define FRAME_LEN 80 /* Total frame length, 10 ms. */ #define FRAME_LEN 80 /* Total frame length, 10 ms. */
#ifdef AECM_SHORT
#define PART_LEN 32 /* Length of partition. */
#define PART_LEN_SHIFT 6 /* Length of (PART_LEN * 2) in base 2. */
#else
#define PART_LEN 64 /* Length of partition. */ #define PART_LEN 64 /* Length of partition. */
#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ #define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */
#endif
#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ #define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */
#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ #define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */
@@ -33,11 +26,7 @@
#define MAX_DELAY 100 #define MAX_DELAY 100
/* Counter parameters */ /* Counter parameters */
#ifdef AECM_SHORT
#define CONV_LEN 1024 /* Convergence length used at startup. */
#else
#define CONV_LEN 512 /* Convergence length used at startup. */ #define CONV_LEN 512 /* Convergence length used at startup. */
#endif
#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ #define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */
/* Energy parameters */ /* Energy parameters */

View File

@@ -20,48 +20,56 @@
#include "cpu_features_wrapper.h" #include "cpu_features_wrapper.h"
#include "nsx_core.h" #include "nsx_core.h"
// Skip first frequency bins during estimation. (0 <= value < 64) #if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
static const int kStartBand = 5; /* Tables are defined in ARM assembly files. */
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
// Constants to compensate for shifting signal log(2^shifts). extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
const WebRtc_Word16 WebRtcNsx_kLogTable[9] = { extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
#else
static const WebRtc_Word16 WebRtcNsx_kLogTable[9] = {
0, 177, 355, 532, 710, 887, 1065, 1242, 1420 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
}; };
const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = { static const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = {
32767, 16384, 10923, 8192, 6554, 5461, 4681, 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
607, 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, 468, 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315, 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278, 275, 273, 271, 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248, 246, 245, 243, 241, 239, 237, 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
236, 234, 232, 231, 229, 228, 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
210, 209, 207, 206, 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
}; };
const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = { static const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = {
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
179, 180, 181, 182, 183, 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
193, 194, 195, 196, 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
207, 208, 208, 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
220, 220, 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
232, 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
}; };
#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
// Skip first frequency bins during estimation. (0 <= value < 64)
static const int kStartBand = 5;
static const WebRtc_Word16 kPowTableFrac[1024] = { static const WebRtc_Word16 kPowTableFrac[1024] = {
0, 1, 1, 2, 3, 3, 4, 5, 0, 1, 1, 2, 3, 3, 4, 5,

View File

@@ -227,10 +227,6 @@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff); void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
#endif #endif
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -22,6 +22,48 @@ GLOBAL_FUNCTION WebRtcNsx_SynthesisUpdateNeon
GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon
GLOBAL_FUNCTION WebRtcNsx_DenormalizeNeon GLOBAL_FUNCTION WebRtcNsx_DenormalizeNeon
GLOBAL_FUNCTION WebRtcNsx_CreateComplexBufferNeon GLOBAL_FUNCTION WebRtcNsx_CreateComplexBufferNeon
GLOBAL_LABEL WebRtcNsx_kLogTable
GLOBAL_LABEL WebRtcNsx_kCounterDiv
GLOBAL_LABEL WebRtcNsx_kLogTableFrac
WebRtcNsx_kLogTableFrac:
.short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26
.short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50
.short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72
.short 73, 74, 75, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93
.short 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110
.short 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
.short 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141
.short 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 155
.short 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 169
.short 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, 179, 180, 181, 182, 183
.short 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, 195, 196
.short 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, 207, 208, 208
.short 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, 220, 220
.short 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, 232
.short 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244
.short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255
.short 255
WebRtcNsx_kCounterDiv:
.short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979
.short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489
.short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964
.short 936, 910, 886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683
.short 669, 655, 643, 630, 618, 607, 596, 585, 575, 565, 555, 546, 537, 529
.short 520, 512, 504, 496, 489, 482, 475, 468, 462, 455, 449, 443, 437, 431
.short 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, 372, 368, 364
.short 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315
.short 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278
.short 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248
.short 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, 226, 224
.short 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, 205
.short 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188
.short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174
.short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
WebRtcNsx_kLogTable:
.short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
@ void NoiseEstimationNeon(NsxInst_t* inst, @ void NoiseEstimationNeon(NsxInst_t* inst,
@ uint16_t* magn, @ uint16_t* magn,
@@ -56,7 +98,7 @@ DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
movw r4, #offset_nsx_normData movw r4, #offset_nsx_normData
ldr r2, [r0, #offset_nsx_stages] @ inst->stages ldr r2, [r0, #offset_nsx_stages] @ inst->stages
ldr r4, [r0, r4] @ inst->normData ldr r4, [r0, r4] @ inst->normData
ldr r12, =WebRtcNsx_kLogTable adr r12, WebRtcNsx_kLogTable
subs r3, r2, r4 @ tabind = inst->stages - inst->normData; subs r3, r2, r4 @ tabind = inst->stages - inst->normData;
ldr r5, [r0, #offset_nsx_magnLen] @ magnLen ldr r5, [r0, #offset_nsx_magnLen] @ magnLen
rsblt r3, #0 rsblt r3, #0
@@ -67,7 +109,7 @@ DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
str r3, [sp] str r3, [sp]
vdup.16 q15, r3 vdup.16 q15, r3
ldr r9, =WebRtcNsx_kLogTableFrac adr r9, WebRtcNsx_kLogTableFrac
LOOP_SET_LMAGN: LOOP_SET_LMAGN:
ldrh r2, [r1], #2 @ magn[i] ldrh r2, [r1], #2 @ magn[i]
@@ -114,7 +156,7 @@ CHECK_LMAGN_COUNTER:
LOOP_SIMULT: LOOP_SIMULT:
ldrsh r1, [r10] @ inst->noiseEstCounter[s] ldrsh r1, [r10] @ inst->noiseEstCounter[s]
ldr r3, =WebRtcNsx_kCounterDiv adr r3, WebRtcNsx_kCounterDiv
mov r11, r1, lsl #1 @ counter mov r11, r1, lsl #1 @ counter
ldrh r11, [r3, r11] @ countDiv = WebRtcNsx_kCounterDiv[counter]; ldrh r11, [r3, r11] @ countDiv = WebRtcNsx_kCounterDiv[counter];
sub r12, r6, #1 @ Loop counter. sub r12, r6, #1 @ Loop counter.

View File

@@ -13,6 +13,48 @@
#include <arm_neon.h> #include <arm_neon.h>
#include <assert.h> #include <assert.h>
// Constants to compensate for shifting signal log(2^shifts).
const WebRtc_Word16 WebRtcNsx_kLogTable[9] = {
0, 177, 355, 532, 710, 887, 1065, 1242, 1420
};
const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = {
32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
};
const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = {
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
};
// Update the noise estimation information. // Update the noise estimation information.
static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) { static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) {
const int16_t kExp2Const = 11819; // Q13 const int16_t kExp2Const = 11819; // Q13

View File

@@ -27,6 +27,9 @@ _\name:
.macro CALL_FUNCTION name .macro CALL_FUNCTION name
bl _\name bl _\name
.endm .endm
.macro GLOBAL_LABEL name
.global _\name
.endm
#else #else
.macro GLOBAL_FUNCTION name .macro GLOBAL_FUNCTION name
.global \name .global \name
@@ -37,6 +40,9 @@ bl _\name
.macro CALL_FUNCTION name .macro CALL_FUNCTION name
bl \name bl \name
.endm .endm
.macro GLOBAL_LABEL name
.global \name
.endm
#endif #endif
.text .text