Fixed text relocation code related to ARM assembly code.

Refer to WebRTC issue 1300.
Review URL: https://webrtc-codereview.appspot.com/1055004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3409 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org
2013-01-25 03:18:05 +00:00
parent e8482f0e9f
commit 16d540eff1
12 changed files with 278 additions and 180 deletions

View File

@@ -15,14 +15,21 @@
*
*/
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
/* Tables are defined in ARM assembly files. */
/* Cosine table 1 in Q14 */
const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
extern const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2];
/* Sine table 1 in Q14 */
extern const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2];
/* Sine table 2 in Q14 */
extern const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4];
#else
/* Cosine table 1 in Q14 */
static const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = {
16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
@@ -41,17 +48,17 @@ const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
-6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006,
-8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803,
-9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433,
-11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, -12867,
-12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, -13970, -14081,
-14189, -14295, -14399, -14500, -14598, -14694, -14788, -14879, -14968, -15053,
-15137, -15218, -15296, -15371, -15444, -15515, -15582, -15647, -15709, -15769,
-15826, -15880, -15931, -15980, -16026, -16069, -16110, -16147, -16182, -16214,
-16244, -16270, -16294, -16315, -16333, -16349, -16362, -16371, -16378, -16383
-11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733,
-12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856,
-13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788,
-14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515,
-15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026,
-16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315,
-16333, -16349, -16362, -16371, -16378, -16383
};
/* Sine table 1 in Q14 */
const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
static const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = {
0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926,
2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033,
4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071,
@@ -80,7 +87,7 @@ const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
/* Sine table 2 in Q14 */
const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
static const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = {
16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853,
15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178,
@@ -94,6 +101,7 @@ const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245,
2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
};
#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
// Declare function pointers.
Spec2Time WebRtcIsacfix_Spec2Time;
@@ -115,8 +123,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
/* Multiply with complex exponentials and combine into one complex vector */
factQ19 = 16921; // 0.5/sqrt(240) in Q19 is round(.5/sqrt(240)*(2^19)) = 16921
for (k = 0; k < FRAMESAMPLES/2; k++) {
tmp1rQ14 = kCosTab1[k];
tmp1iQ14 = kSinTab1[k];
tmp1rQ14 = WebRtcIsacfix_kCosTab1[k];
tmp1iQ14 = WebRtcIsacfix_kSinTab1[k];
xrQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre1Q9[k]) + WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre2Q9[k]), 7);
xiQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre2Q9[k]) - WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre1Q9[k]), 7);
tmpreQ16[k] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(factQ19, xrQ16)+4, 3); // (Q16*Q19>>16)>>3 = Q16
@@ -171,8 +179,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
yiQ16 = -tmpreQ16[k] + tmpreQ16[FRAMESAMPLES/2 - 1 - k];
xiQ16 = tmpimQ16[k] - tmpimQ16[FRAMESAMPLES/2 - 1 - k];
yrQ16 = tmpimQ16[k] + tmpimQ16[FRAMESAMPLES/2 - 1 - k];
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k];
tmp1iQ14 = kSinTab2[k];
tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k];
tmp1iQ14 = WebRtcIsacfix_kSinTab2[k];
v1Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xrQ16) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xiQ16);
v2Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xrQ16) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xiQ16);
outreQ7[k] = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(v1Q16, 9);
@@ -198,8 +206,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
for (k = 0; k < FRAMESAMPLES/4; k++) {
/* Move zero in time to beginning of frames */
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k];
tmp1iQ14 = kSinTab2[k];
tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k];
tmp1iQ14 = WebRtcIsacfix_kSinTab2[k];
tmpInRe = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inreQ7[k], 9); // Q7 -> Q16
tmpInIm = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inimQ7[k], 9); // Q7 -> Q16
@@ -270,8 +278,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
/* Demodulate and separate */
factQ11 = 31727; // sqrt(240) in Q11 is round(15.49193338482967*2048) = 31727
for (k = 0; k < FRAMESAMPLES/2; k++) {
tmp1rQ14 = kCosTab1[k];
tmp1iQ14 = kSinTab1[k];
tmp1rQ14 = WebRtcIsacfix_kCosTab1[k];
tmp1iQ14 = WebRtcIsacfix_kSinTab1[k];
xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre1Q16[k]) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre2Q16[k]);
xiQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre2Q16[k]) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre1Q16[k]);
xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16);

View File

@@ -16,6 +16,9 @@
GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon
GLOBAL_FUNCTION WebRtcIsacfix_Time2SpecNeon
GLOBAL_LABEL WebRtcIsacfix_kSinTab1
GLOBAL_LABEL WebRtcIsacfix_kCosTab1
GLOBAL_LABEL WebRtcIsacfix_kSinTab2
@ void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9,
@ WebRtc_Word16* inre2Q9,
@@ -39,9 +42,9 @@ DEFINE_FUNCTION WebRtcIsacfix_Time2SpecNeon
add r4, sp, #16 @ tmpreQ16;
add r5, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16;
adr r9, kCosTab1
mov r6, #(kSinTab1 - kCosTab1)
add r10, r9, r6 @ kSinTab1
adr r9, WebRtcIsacfix_kCosTab1
mov r6, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1)
add r10, r9, r6 @ WebRtcIsacfix_kSinTab1
vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm.
vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe.
@@ -55,18 +58,18 @@ Time2Spec_TransformAndFindMax:
subs r8, #8
vld1.16 {q0}, [r9:64]! @ kCosTab1[]
vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[]
vld1.16 {q2}, [r0]! @ inre1Q9[]
vmull.s16 q8, d0, d4 @ kCosTab1[k] * inre1Q9[k]
vld1.16 {q1}, [r10:64]! @ kSinTab1[]
vmull.s16 q9, d1, d5 @ kCosTab1[k] * inre1Q9[k]
vmull.s16 q8, d0, d4 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k]
vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[]
vmull.s16 q9, d1, d5 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k]
vld1.16 {q3}, [r1]! @ inre2Q9[]
vmlal.s16 q8, d2, d6 @ kSinTab1[k] * inre2Q9[k]
vmlal.s16 q9, d3, d7 @ kSinTab1[k] * inre2Q9[k]
vmull.s16 q12, d0, d6 @ kCosTab1[k] * inre2Q9[k]
vmull.s16 q13, d1, d7 @ kCosTab1[k] * inre2Q9[k]
vmlsl.s16 q12, d2, d4 @ kSinTab1[k] * inre1Q9[k]
vmlsl.s16 q13, d3, d5 @ kSinTab1[k] * inre1Q9[k]
vmlal.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k]
vmlal.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k]
vmull.s16 q12, d0, d6 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k]
vmull.s16 q13, d1, d7 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k]
vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k]
vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k]
vqdmulh.s32 q0, q8, q4 @ xrQ16 * factQ19
vqdmulh.s32 q1, q9, q4 @ xrQ16 * factQ19
@@ -153,9 +156,9 @@ Time2Spec_PreFftShift:
ldr r7, [sp, #4] @ inre2Q9
add r4, r6, r8 @ &inre1Q9[FRAMESAMPLES / 2 - 4]
add r5, r7, r8 @ &inre2Q9[FRAMESAMPLES / 2 - 4]
adr r10, kSinTab2
adr r10, WebRtcIsacfix_kSinTab2
add r9, r10, #(120*2 - 8) @ &kSinTab2[119 - 4]
add r9, r10, #(120*2 - 8) @ &WebRtcIsacfix_kSinTab2[119 - 4]
mov r8, #(FRAMESAMPLES / 4) @ loop counter
@@ -170,9 +173,9 @@ Time2Spec_PostFftTransform:
@ By vshl, we effectively did "<< (-sh - 23)", instead of "<< (-sh)",
@ ">> 14" and then ">> 9" as in the C code.
vld1.16 {d6}, [r9:64] @ kCosTab2[]
vld1.16 {d6}, [r9, :64] @ kCosTab2[]
vneg.s16 d6, d6
vld1.16 {d7}, [r10:64]! @ kSinTab2[]
vld1.16 {d7}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[]
vrev64.16 q1, q1 @ Reverse samples in 2nd half of xrQ16[].
vqadd.s16 d4, d0, d2 @ xrQ16
vqsub.s16 d5, d1, d3 @ xiQ16
@@ -187,12 +190,12 @@ Time2Spec_PostFftTransform:
vqsub.s16 d0, d2, d0 @ yiQ16
vmull.s16 q12, d6, d4 @ kCosTab2[k] * xrQ16
vmlsl.s16 q12, d7, d5 @ kSinTab2[k] * xiQ16
vmull.s16 q13, d7, d4 @ kSinTab2[k] * xrQ16
vmlsl.s16 q12, d7, d5 @ WebRtcIsacfix_kSinTab2[k] * xiQ16
vmull.s16 q13, d7, d4 @ WebRtcIsacfix_kSinTab2[k] * xrQ16
vmlal.s16 q13, d6, d5 @ kCosTab2[k] * xiQ16
vmull.s16 q6, d7, d1 @ kSinTab2[k] * yrQ16
vmull.s16 q6, d7, d1 @ WebRtcIsacfix_kSinTab2[k] * yrQ16
vmlal.s16 q6, d6, d0 @ kCosTab2[k] * yiQ16
vmull.s16 q7, d7, d0 @ kSinTab2[k] * yiQ16
vmull.s16 q7, d7, d0 @ WebRtcIsacfix_kSinTab2[k] * yiQ16
vmlsl.s16 q7, d6, d1 @ kCosTab2[k] * yrQ16
vshl.s32 q12, q12, q15
@@ -225,7 +228,7 @@ Time2Spec_PostFftTransform:
.align 8
@ Cosine table 1 in Q14
kCosTab1:
WebRtcIsacfix_kCosTab1:
.short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315
.short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069
.short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647
@@ -259,7 +262,7 @@ kCosTab1:
.align 8
@ Sine table 2 in Q14
kSinTab2:
WebRtcIsacfix_kSinTab2:
.short 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305
.short 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048
.short 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615
@@ -280,7 +283,7 @@ kSinTab2:
.align 8
@ Sine table 1 in Q14
kSinTab1:
WebRtcIsacfix_kSinTab1:
.short 0, 214, 429, 643, 857, 1072, 1285, 1499
.short 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196
.short 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859
@@ -334,8 +337,8 @@ DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon
add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES / 2 - 8]
mov r8, #(FRAMESAMPLES / 2) @ loop counter
adr r10, kSinTab2
add r9, r10, #(120*2 - 16) @ &kSinTab2[119 - 8]
adr r10, WebRtcIsacfix_kSinTab2
add r9, r10, #(120*2 - 16) @ &WebRtcIsacfix_kSinTab2[119 - 8]
mov r5, #-32
@@ -349,24 +352,24 @@ TransformAndFindMax:
subs r8, #16
vld1.16 {q0}, [r9:64] @ kCosTab2[]
vld1.16 {q0}, [r9, :64] @ kCosTab2[]
sub r9, #16
vld1.16 {q2}, [r0]! @ inreQ7[]
vneg.s16 q0, q0
vld1.16 {q3}, [r1]! @ inimQ7[]
vrev64.16 d0, d0
vrev64.16 d1, d1
vld1.16 {q1}, [r10:64]! @ kSinTab2[]
vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[]
vswp d0, d1
vmull.s16 q8, d2, d6 @ kSinTab2[k] * inimQ7[k]
vmull.s16 q9, d3, d7 @ kSinTab2[k] * inimQ7[k]
vmull.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmull.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmlal.s16 q8, d0, d4 @ kCosTab2[k] * inreQ7[k]
vmlal.s16 q9, d1, d5 @ kCosTab2[k] * inreQ7[k]
vmull.s16 q12, d0, d6 @ kCosTab2[k] * inimQ7[k]
vmull.s16 q13, d1, d7 @ kCosTab2[k] * inimQ7[k]
vmlsl.s16 q12, d2, d4 @ kSinTab2[k] * inreQ7[k]
vmlsl.s16 q13, d3, d5 @ kSinTab2[k] * inreQ7[k]
vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k]
vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k]
vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES / 2 - 8 + i]
vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES / 2 - 8 + i]
@@ -374,15 +377,15 @@ TransformAndFindMax:
vrev64.16 q2, q2 @ Reverse the order of the samples
vrev64.16 q3, q3 @ Reverse the order of the samples
vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k]
vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k]
vmlsl.s16 q14, d0, d7 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k]
vmlsl.s16 q15, d1, d6 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k]
vmull.s16 q14, d2, d5 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmull.s16 q15, d3, d4 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
vmlsl.s16 q14, d0, d7 @ q14 -= kCosTab2[k] * inreQ7[k]
vmlsl.s16 q15, d1, d6 @ q15 -= kCosTab2[k] * inreQ7[k]
vmull.s16 q10, d0, d5 @ kCosTab2[k] * inimQ7[]
vmull.s16 q11, d1, d4 @ kCosTab2[k] * inimQ7[]
vmlal.s16 q10, d2, d7 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
vmlal.s16 q11, d3, d6 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
vmlal.s16 q10, d2, d7 @ q10 += WebRtcIsacfix_kSinTab2[k] * inreQ7[]
vmlal.s16 q11, d3, d6 @ q11 += WebRtcIsacfix_kSinTab2[k] * inreQ7[]
vshr.s32 q8, q8, #5 @ xrQ16
vshr.s32 q9, q9, #5 @ xrQ16
@@ -448,9 +451,9 @@ TransformAndFindMax:
bgt TransformAndFindMax
adr r10, kSinTab1
mov r2, #(kSinTab1 - kCosTab1)
sub r9, r10, r2 @ kCosTab1
adr r10, WebRtcIsacfix_kSinTab1
mov r2, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1)
sub r9, r10, r2 @ WebRtcIsacfix_kCosTab1
@ Find the maximum value in the Neon registers
vmax.u32 d12, d13
@@ -565,39 +568,39 @@ PostFftShiftDivide:
DemodulateAndSeparate:
subs r8, #8
vld1.16 {q0}, [r9:64]! @ kCosTab1[]
vmovl.s16 q6, d0 @ kCosTab1[]
vld1.16 {q1}, [r10:64]! @ kSinTab1[]
vmovl.s16 q7, d1 @ kCosTab1[]
vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[]
vmovl.s16 q6, d0 @ WebRtcIsacfix_kCosTab1[]
vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[]
vmovl.s16 q7, d1 @ WebRtcIsacfix_kCosTab1[]
vld1.32 {q2, q3}, [r2] @ outre1Q16
vmovl.s16 q8, d2 @ kSinTab1[]
vmovl.s16 q8, d2 @ WebRtcIsacfix_kSinTab1[]
vld1.32 {q4, q5}, [r3] @ outre2Q16
vmovl.s16 q9, d3 @ kSinTab1[]
vmovl.s16 q9, d3 @ WebRtcIsacfix_kSinTab1[]
vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q11, d13, d5 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k]
vmull.s32 q10, d12, d4 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmull.s32 q11, d13, d5 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmull.s32 q12, d14, d6 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmull.s32 q13, d15, d7 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
vmlsl.s32 q10, d16, d8 @ += kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q11, d17, d9 @ += kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q12, d18, d10 @ += kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q13, d19, d11 @ += kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q10, d16, d8 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q11, d17, d9 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q12, d18, d10 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vmlsl.s32 q13, d19, d11 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
vrshrn.s64 d20, q10, #10 @ xrQ16
vrshrn.s64 d21, q11, #10 @ xrQ16
vrshrn.s64 d22, q12, #10 @ xrQ16
vrshrn.s64 d23, q13, #10 @ xrQ16
vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k]
vmull.s32 q12, d12, d8 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmull.s32 q13, d13, d9 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmull.s32 q14, d14, d10 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmull.s32 q15, d15, d11 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
vmlal.s32 q12, d16, d4 @ += kSinTab1[k] * outre1Q16[k]
vmlal.s32 q13, d17, d5 @ += kSinTab1[k] * outre1Q16[k]
vmlal.s32 q14, d18, d6 @ += kSinTab1[k] * outre1Q16[k]
vmlal.s32 q15, d19, d7 @ += kSinTab1[k] * outre1Q16[k]
vmlal.s32 q12, d16, d4 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vmlal.s32 q13, d17, d5 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vmlal.s32 q14, d18, d6 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vmlal.s32 q15, d19, d7 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
vdup.s32 q4, r0 @ generic -> Neon doesn't cost extra cycles.
@@ -622,4 +625,3 @@ DemodulateAndSeparate:
add sp, sp, #16
vpop {q4-q7}
pop {r4-r11,pc}

View File

@@ -27,31 +27,21 @@ FILE *dfile;
FILE *testfile;
#endif
#ifdef AECM_SHORT
// Square root of Hanning window in Q14
const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] =
{
0, 804, 1606, 2404, 3196, 3981, 4756, 5520,
6270, 7005, 7723, 8423, 9102, 9760, 10394, 11003,
11585, 12140, 12665, 13160, 13623, 14053, 14449, 14811,
15137, 15426, 15679, 15893, 16069, 16207, 16305, 16364,
16384
};
// Square root of Hanning window in Q14.
#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
// Table is defined in an ARM assembly file.
extern const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END;
#else
// Square root of Hanning window in Q14
const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END =
{
static const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, 8364,
8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 11795, 12068, 12335,
12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, 14384, 14571, 14749, 14918,
15079, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 16111, 16179, 16237,
16286, 16325, 16354, 16373, 16384
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
};
#endif
//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation

View File

@@ -25,8 +25,6 @@
#define ALIGN8_END __attribute__((aligned(8)))
#endif
extern const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END;
typedef struct {
WebRtc_Word16 real;
WebRtc_Word16 imag;

View File

@@ -16,8 +16,7 @@
#include "aecm_core_neon_offsets.h"
#include "webrtc/system_wrappers/interface/asm_defines.h"
.extern WebRtcAecm_kSqrtHanning
GLOBAL_LABEL WebRtcAecm_kSqrtHanning
GLOBAL_FUNCTION WebRtcAecm_WindowAndFFTNeon
GLOBAL_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
@@ -38,7 +37,7 @@ DEFINE_FUNCTION WebRtcAecm_WindowAndFFTNeon
vmov.i16 d21, #0 @ For imaginary parts of |fft|.
vmov.i16 d27, #0 @ For imaginary parts of |fft|.
ldr r5, =WebRtcAecm_kSqrtHanning
adr r5, WebRtcAecm_kSqrtHanning
adr lr, kSqrtHanningReversed
add r4, r1, #(PART_LEN2 * 2) @ &fft[PART_LEN2]
add r12, r2, #(PART_LEN * 2) @ time_signal[PART_LEN]
@@ -133,7 +132,7 @@ LOOP_PRE_IFFT:
ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0]
adr r12, kSqrtHanningReversed
ldr r6, =WebRtcAecm_kSqrtHanning
adr r6, WebRtcAecm_kSqrtHanning
rsb r0, r2, r0 @ outCFFT - aecm->dfaCleanQDomain
vdup.32 q9, r0
add r0, r4, #(PART_LEN * 4) @ &efw[PART_LEN]
@@ -321,8 +320,21 @@ LOOP_RESET_ADAPTIVE_CHANNEL:
bx lr
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
@ the order was reversed and one useless element (0) was removed.
@ Square root of Hanning window in Q14.
.align 3
WebRtcAecm_kSqrtHanning:
.short 0
.short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
.short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
.short 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040
.short 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514
.short 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553
.short 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079
.short 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034
.short 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
@ the order was reversed and one element (0) was removed.
.align 3
kSqrtHanningReversed:
.short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947

View File

@@ -19,23 +19,28 @@
// generating script and makefile, to replace these C functions.
// Square root of Hanning window in Q14.
static const WebRtc_Word16 kSqrtHanningReversed[] ALIGN8_END = {
16384, 16373, 16354, 16325,
16286, 16237, 16179, 16111,
16034, 15947, 15851, 15746,
15631, 15506, 15373, 15231,
15079, 14918, 14749, 14571,
14384, 14189, 13985, 13773,
13553, 13325, 13089, 12845,
12594, 12335, 12068, 11795,
11514, 11227, 10933, 10633,
10326, 10013, 9695, 9370,
9040, 8705, 8364, 8019,
7668, 7313, 6954, 6591,
6224, 5853, 5478, 5101,
4720, 4337, 3951, 3562,
3172, 2780, 2386, 1990,
1594, 1196, 798, 399
const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
0,
399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
};
// Square root of Hanning window in Q14, in reversed order.
static const ALIGN8_BEG WebRtc_Word16 kSqrtHanningReversed[] ALIGN8_END = {
16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111,
16034, 15947, 15851, 15746, 15631, 15506, 15373, 15231,
15079, 14918, 14749, 14571, 14384, 14189, 13985, 13773,
13553, 13325, 13089, 12845, 12594, 12335, 12068, 11795,
11514, 11227, 10933, 10633, 10326, 10013, 9695, 9370,
9040, 8705, 8364, 8019, 7668, 7313, 6954, 6591,
6224, 5853, 5478, 5101, 4720, 4337, 3951, 3562,
3172, 2780, 2386, 1990, 1594, 1196, 798, 399
};
void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,

View File

@@ -13,18 +13,11 @@
#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */
/* #define AECM_SHORT For 32 sample partition length. */
/* Algorithm parameters */
#define FRAME_LEN 80 /* Total frame length, 10 ms. */
#ifdef AECM_SHORT
#define PART_LEN 32 /* Length of partition. */
#define PART_LEN_SHIFT 6 /* Length of (PART_LEN * 2) in base 2. */
#else
#define PART_LEN 64 /* Length of partition. */
#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */
#endif
#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */
#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */
@@ -33,11 +26,7 @@
#define MAX_DELAY 100
/* Counter parameters */
#ifdef AECM_SHORT
#define CONV_LEN 1024 /* Convergence length used at startup. */
#else
#define CONV_LEN 512 /* Convergence length used at startup. */
#endif
#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */
/* Energy parameters */

View File

@@ -20,48 +20,56 @@
#include "cpu_features_wrapper.h"
#include "nsx_core.h"
// Skip first frequency bins during estimation. (0 <= value < 64)
static const int kStartBand = 5;
// Constants to compensate for shifting signal log(2^shifts).
const WebRtc_Word16 WebRtcNsx_kLogTable[9] = {
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
/* Tables are defined in ARM assembly files. */
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
#else
static const WebRtc_Word16 WebRtcNsx_kLogTable[9] = {
0, 177, 355, 532, 710, 887, 1065, 1242, 1420
};
const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = {
32767, 16384, 10923, 8192, 6554, 5461, 4681,
4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560,
1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910,
886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618,
607, 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, 468,
462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377,
372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315,
312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278, 275, 273, 271,
269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248, 246, 245, 243, 241, 239, 237,
236, 234, 232, 231, 229, 228, 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211,
210, 209, 207, 206, 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191,
189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
static const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = {
32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
};
const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = {
static const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = {
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117,
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 178,
179, 180, 181, 182, 183, 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192,
193, 194, 195, 196, 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206,
207, 208, 208, 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219,
220, 220, 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231,
232, 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244,
244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
};
#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
// Skip first frequency bins during estimation. (0 <= value < 64)
static const int kStartBand = 5;
static const WebRtc_Word16 kPowTableFrac[1024] = {
0, 1, 1, 2, 3, 3, 4, 5,

View File

@@ -227,10 +227,6 @@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
#endif
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
#ifdef __cplusplus
}
#endif

View File

@@ -22,6 +22,48 @@ GLOBAL_FUNCTION WebRtcNsx_SynthesisUpdateNeon
GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon
GLOBAL_FUNCTION WebRtcNsx_DenormalizeNeon
GLOBAL_FUNCTION WebRtcNsx_CreateComplexBufferNeon
GLOBAL_LABEL WebRtcNsx_kLogTable
GLOBAL_LABEL WebRtcNsx_kCounterDiv
GLOBAL_LABEL WebRtcNsx_kLogTableFrac
WebRtcNsx_kLogTableFrac:
.short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26
.short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50
.short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72
.short 73, 74, 75, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93
.short 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110
.short 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
.short 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141
.short 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 155
.short 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 169
.short 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, 179, 180, 181, 182, 183
.short 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, 195, 196
.short 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, 207, 208, 208
.short 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, 220, 220
.short 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, 232
.short 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244
.short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255
.short 255
WebRtcNsx_kCounterDiv:
.short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979
.short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489
.short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964
.short 936, 910, 886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683
.short 669, 655, 643, 630, 618, 607, 596, 585, 575, 565, 555, 546, 537, 529
.short 520, 512, 504, 496, 489, 482, 475, 468, 462, 455, 449, 443, 437, 431
.short 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, 372, 368, 364
.short 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315
.short 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278
.short 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248
.short 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, 226, 224
.short 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, 205
.short 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188
.short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174
.short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
WebRtcNsx_kLogTable:
.short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
@ void NoiseEstimationNeon(NsxInst_t* inst,
@ uint16_t* magn,
@@ -56,7 +98,7 @@ DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
movw r4, #offset_nsx_normData
ldr r2, [r0, #offset_nsx_stages] @ inst->stages
ldr r4, [r0, r4] @ inst->normData
ldr r12, =WebRtcNsx_kLogTable
adr r12, WebRtcNsx_kLogTable
subs r3, r2, r4 @ tabind = inst->stages - inst->normData;
ldr r5, [r0, #offset_nsx_magnLen] @ magnLen
rsblt r3, #0
@@ -67,7 +109,7 @@ DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
str r3, [sp]
vdup.16 q15, r3
ldr r9, =WebRtcNsx_kLogTableFrac
adr r9, WebRtcNsx_kLogTableFrac
LOOP_SET_LMAGN:
ldrh r2, [r1], #2 @ magn[i]
@@ -114,7 +156,7 @@ CHECK_LMAGN_COUNTER:
LOOP_SIMULT:
ldrsh r1, [r10] @ inst->noiseEstCounter[s]
ldr r3, =WebRtcNsx_kCounterDiv
adr r3, WebRtcNsx_kCounterDiv
mov r11, r1, lsl #1 @ counter
ldrh r11, [r3, r11] @ countDiv = WebRtcNsx_kCounterDiv[counter];
sub r12, r6, #1 @ Loop counter.

View File

@@ -13,6 +13,48 @@
#include <arm_neon.h>
#include <assert.h>
// Constants to compensate for shifting signal log(2^shifts).
const WebRtc_Word16 WebRtcNsx_kLogTable[9] = {
0, 177, 355, 532, 710, 887, 1065, 1242, 1420
};
const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = {
32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
};
const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = {
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
};
// Update the noise estimation information.
static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) {
const int16_t kExp2Const = 11819; // Q13

View File

@@ -27,6 +27,9 @@ _\name:
.macro CALL_FUNCTION name
bl _\name
.endm
.macro GLOBAL_LABEL name
.global _\name
.endm
#else
.macro GLOBAL_FUNCTION name
.global \name
@@ -37,6 +40,9 @@ bl _\name
.macro CALL_FUNCTION name
bl \name
.endm
.macro GLOBAL_LABEL name
.global \name
.endm
#endif
.text