Fixed text relocation code related to ARM assembly code.
Refer to WebRTC issue 1300. Review URL: https://webrtc-codereview.appspot.com/1055004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3409 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -15,14 +15,21 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h"
|
||||
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
|
||||
|
||||
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
|
||||
/* Tables are defined in ARM assembly files. */
|
||||
/* Cosine table 1 in Q14 */
|
||||
const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
|
||||
extern const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2];
|
||||
/* Sine table 1 in Q14 */
|
||||
extern const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2];
|
||||
/* Sine table 2 in Q14 */
|
||||
extern const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4];
|
||||
#else
|
||||
/* Cosine table 1 in Q14 */
|
||||
static const WebRtc_Word16 WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = {
|
||||
16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
|
||||
16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
|
||||
15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
|
||||
@@ -41,17 +48,17 @@ const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = {
|
||||
-6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006,
|
||||
-8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803,
|
||||
-9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433,
|
||||
-11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, -12867,
|
||||
-12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, -13970, -14081,
|
||||
-14189, -14295, -14399, -14500, -14598, -14694, -14788, -14879, -14968, -15053,
|
||||
-15137, -15218, -15296, -15371, -15444, -15515, -15582, -15647, -15709, -15769,
|
||||
-15826, -15880, -15931, -15980, -16026, -16069, -16110, -16147, -16182, -16214,
|
||||
-16244, -16270, -16294, -16315, -16333, -16349, -16362, -16371, -16378, -16383
|
||||
-11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733,
|
||||
-12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856,
|
||||
-13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788,
|
||||
-14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515,
|
||||
-15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026,
|
||||
-16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315,
|
||||
-16333, -16349, -16362, -16371, -16378, -16383
|
||||
};
|
||||
|
||||
|
||||
/* Sine table 1 in Q14 */
|
||||
const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
|
||||
static const WebRtc_Word16 WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = {
|
||||
0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926,
|
||||
2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033,
|
||||
4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071,
|
||||
@@ -80,7 +87,7 @@ const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
|
||||
|
||||
|
||||
/* Sine table 2 in Q14 */
|
||||
const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
|
||||
static const WebRtc_Word16 WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = {
|
||||
16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
|
||||
16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853,
|
||||
15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178,
|
||||
@@ -94,6 +101,7 @@ const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
|
||||
4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245,
|
||||
2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
|
||||
};
|
||||
#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
|
||||
|
||||
// Declare function pointers.
|
||||
Spec2Time WebRtcIsacfix_Spec2Time;
|
||||
@@ -115,8 +123,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
|
||||
/* Multiply with complex exponentials and combine into one complex vector */
|
||||
factQ19 = 16921; // 0.5/sqrt(240) in Q19 is round(.5/sqrt(240)*(2^19)) = 16921
|
||||
for (k = 0; k < FRAMESAMPLES/2; k++) {
|
||||
tmp1rQ14 = kCosTab1[k];
|
||||
tmp1iQ14 = kSinTab1[k];
|
||||
tmp1rQ14 = WebRtcIsacfix_kCosTab1[k];
|
||||
tmp1iQ14 = WebRtcIsacfix_kSinTab1[k];
|
||||
xrQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre1Q9[k]) + WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre2Q9[k]), 7);
|
||||
xiQ16 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(tmp1rQ14, inre2Q9[k]) - WEBRTC_SPL_MUL_16_16(tmp1iQ14, inre1Q9[k]), 7);
|
||||
tmpreQ16[k] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(factQ19, xrQ16)+4, 3); // (Q16*Q19>>16)>>3 = Q16
|
||||
@@ -171,8 +179,8 @@ void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
|
||||
yiQ16 = -tmpreQ16[k] + tmpreQ16[FRAMESAMPLES/2 - 1 - k];
|
||||
xiQ16 = tmpimQ16[k] - tmpimQ16[FRAMESAMPLES/2 - 1 - k];
|
||||
yrQ16 = tmpimQ16[k] + tmpimQ16[FRAMESAMPLES/2 - 1 - k];
|
||||
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k];
|
||||
tmp1iQ14 = kSinTab2[k];
|
||||
tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k];
|
||||
tmp1iQ14 = WebRtcIsacfix_kSinTab2[k];
|
||||
v1Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xrQ16) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xiQ16);
|
||||
v2Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xrQ16) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xiQ16);
|
||||
outreQ7[k] = (WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(v1Q16, 9);
|
||||
@@ -198,8 +206,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES/4; k++) {
|
||||
/* Move zero in time to beginning of frames */
|
||||
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k];
|
||||
tmp1iQ14 = kSinTab2[k];
|
||||
tmp1rQ14 = -WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4 - 1 - k];
|
||||
tmp1iQ14 = WebRtcIsacfix_kSinTab2[k];
|
||||
|
||||
tmpInRe = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inreQ7[k], 9); // Q7 -> Q16
|
||||
tmpInIm = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inimQ7[k], 9); // Q7 -> Q16
|
||||
@@ -270,8 +278,8 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
|
||||
/* Demodulate and separate */
|
||||
factQ11 = 31727; // sqrt(240) in Q11 is round(15.49193338482967*2048) = 31727
|
||||
for (k = 0; k < FRAMESAMPLES/2; k++) {
|
||||
tmp1rQ14 = kCosTab1[k];
|
||||
tmp1iQ14 = kSinTab1[k];
|
||||
tmp1rQ14 = WebRtcIsacfix_kCosTab1[k];
|
||||
tmp1iQ14 = WebRtcIsacfix_kSinTab1[k];
|
||||
xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre1Q16[k]) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre2Q16[k]);
|
||||
xiQ16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, outre2Q16[k]) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, outre1Q16[k]);
|
||||
xrQ16 = WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16);
|
||||
|
||||
@@ -16,6 +16,9 @@
|
||||
|
||||
GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon
|
||||
GLOBAL_FUNCTION WebRtcIsacfix_Time2SpecNeon
|
||||
GLOBAL_LABEL WebRtcIsacfix_kSinTab1
|
||||
GLOBAL_LABEL WebRtcIsacfix_kCosTab1
|
||||
GLOBAL_LABEL WebRtcIsacfix_kSinTab2
|
||||
|
||||
@ void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9,
|
||||
@ WebRtc_Word16* inre2Q9,
|
||||
@@ -39,9 +42,9 @@ DEFINE_FUNCTION WebRtcIsacfix_Time2SpecNeon
|
||||
add r4, sp, #16 @ tmpreQ16;
|
||||
add r5, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16;
|
||||
|
||||
adr r9, kCosTab1
|
||||
mov r6, #(kSinTab1 - kCosTab1)
|
||||
add r10, r9, r6 @ kSinTab1
|
||||
adr r9, WebRtcIsacfix_kCosTab1
|
||||
mov r6, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1)
|
||||
add r10, r9, r6 @ WebRtcIsacfix_kSinTab1
|
||||
|
||||
vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm.
|
||||
vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe.
|
||||
@@ -55,18 +58,18 @@ Time2Spec_TransformAndFindMax:
|
||||
|
||||
subs r8, #8
|
||||
|
||||
vld1.16 {q0}, [r9:64]! @ kCosTab1[]
|
||||
vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[]
|
||||
vld1.16 {q2}, [r0]! @ inre1Q9[]
|
||||
vmull.s16 q8, d0, d4 @ kCosTab1[k] * inre1Q9[k]
|
||||
vld1.16 {q1}, [r10:64]! @ kSinTab1[]
|
||||
vmull.s16 q9, d1, d5 @ kCosTab1[k] * inre1Q9[k]
|
||||
vmull.s16 q8, d0, d4 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k]
|
||||
vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[]
|
||||
vmull.s16 q9, d1, d5 @ WebRtcIsacfix_kCosTab1[k] * inre1Q9[k]
|
||||
vld1.16 {q3}, [r1]! @ inre2Q9[]
|
||||
vmlal.s16 q8, d2, d6 @ kSinTab1[k] * inre2Q9[k]
|
||||
vmlal.s16 q9, d3, d7 @ kSinTab1[k] * inre2Q9[k]
|
||||
vmull.s16 q12, d0, d6 @ kCosTab1[k] * inre2Q9[k]
|
||||
vmull.s16 q13, d1, d7 @ kCosTab1[k] * inre2Q9[k]
|
||||
vmlsl.s16 q12, d2, d4 @ kSinTab1[k] * inre1Q9[k]
|
||||
vmlsl.s16 q13, d3, d5 @ kSinTab1[k] * inre1Q9[k]
|
||||
vmlal.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k]
|
||||
vmlal.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab1[k] * inre2Q9[k]
|
||||
vmull.s16 q12, d0, d6 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k]
|
||||
vmull.s16 q13, d1, d7 @ WebRtcIsacfix_kCosTab1[k] * inre2Q9[k]
|
||||
vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k]
|
||||
vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab1[k] * inre1Q9[k]
|
||||
|
||||
vqdmulh.s32 q0, q8, q4 @ xrQ16 * factQ19
|
||||
vqdmulh.s32 q1, q9, q4 @ xrQ16 * factQ19
|
||||
@@ -153,9 +156,9 @@ Time2Spec_PreFftShift:
|
||||
ldr r7, [sp, #4] @ inre2Q9
|
||||
add r4, r6, r8 @ &inre1Q9[FRAMESAMPLES / 2 - 4]
|
||||
add r5, r7, r8 @ &inre2Q9[FRAMESAMPLES / 2 - 4]
|
||||
adr r10, kSinTab2
|
||||
adr r10, WebRtcIsacfix_kSinTab2
|
||||
|
||||
add r9, r10, #(120*2 - 8) @ &kSinTab2[119 - 4]
|
||||
add r9, r10, #(120*2 - 8) @ &WebRtcIsacfix_kSinTab2[119 - 4]
|
||||
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 4) @ loop counter
|
||||
@@ -170,9 +173,9 @@ Time2Spec_PostFftTransform:
|
||||
@ By vshl, we effectively did "<< (-sh - 23)", instead of "<< (-sh)",
|
||||
@ ">> 14" and then ">> 9" as in the C code.
|
||||
|
||||
vld1.16 {d6}, [r9:64] @ kCosTab2[]
|
||||
vld1.16 {d6}, [r9, :64] @ kCosTab2[]
|
||||
vneg.s16 d6, d6
|
||||
vld1.16 {d7}, [r10:64]! @ kSinTab2[]
|
||||
vld1.16 {d7}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[]
|
||||
vrev64.16 q1, q1 @ Reverse samples in 2nd half of xrQ16[].
|
||||
vqadd.s16 d4, d0, d2 @ xrQ16
|
||||
vqsub.s16 d5, d1, d3 @ xiQ16
|
||||
@@ -187,12 +190,12 @@ Time2Spec_PostFftTransform:
|
||||
vqsub.s16 d0, d2, d0 @ yiQ16
|
||||
|
||||
vmull.s16 q12, d6, d4 @ kCosTab2[k] * xrQ16
|
||||
vmlsl.s16 q12, d7, d5 @ kSinTab2[k] * xiQ16
|
||||
vmull.s16 q13, d7, d4 @ kSinTab2[k] * xrQ16
|
||||
vmlsl.s16 q12, d7, d5 @ WebRtcIsacfix_kSinTab2[k] * xiQ16
|
||||
vmull.s16 q13, d7, d4 @ WebRtcIsacfix_kSinTab2[k] * xrQ16
|
||||
vmlal.s16 q13, d6, d5 @ kCosTab2[k] * xiQ16
|
||||
vmull.s16 q6, d7, d1 @ kSinTab2[k] * yrQ16
|
||||
vmull.s16 q6, d7, d1 @ WebRtcIsacfix_kSinTab2[k] * yrQ16
|
||||
vmlal.s16 q6, d6, d0 @ kCosTab2[k] * yiQ16
|
||||
vmull.s16 q7, d7, d0 @ kSinTab2[k] * yiQ16
|
||||
vmull.s16 q7, d7, d0 @ WebRtcIsacfix_kSinTab2[k] * yiQ16
|
||||
vmlsl.s16 q7, d6, d1 @ kCosTab2[k] * yrQ16
|
||||
|
||||
vshl.s32 q12, q12, q15
|
||||
@@ -225,7 +228,7 @@ Time2Spec_PostFftTransform:
|
||||
|
||||
.align 8
|
||||
@ Cosine table 1 in Q14
|
||||
kCosTab1:
|
||||
WebRtcIsacfix_kCosTab1:
|
||||
.short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315
|
||||
.short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069
|
||||
.short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647
|
||||
@@ -259,7 +262,7 @@ kCosTab1:
|
||||
|
||||
.align 8
|
||||
@ Sine table 2 in Q14
|
||||
kSinTab2:
|
||||
WebRtcIsacfix_kSinTab2:
|
||||
.short 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305
|
||||
.short 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048
|
||||
.short 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615
|
||||
@@ -280,7 +283,7 @@ kSinTab2:
|
||||
|
||||
.align 8
|
||||
@ Sine table 1 in Q14
|
||||
kSinTab1:
|
||||
WebRtcIsacfix_kSinTab1:
|
||||
.short 0, 214, 429, 643, 857, 1072, 1285, 1499
|
||||
.short 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196
|
||||
.short 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859
|
||||
@@ -334,8 +337,8 @@ DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon
|
||||
add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES / 2 - 8]
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 2) @ loop counter
|
||||
adr r10, kSinTab2
|
||||
add r9, r10, #(120*2 - 16) @ &kSinTab2[119 - 8]
|
||||
adr r10, WebRtcIsacfix_kSinTab2
|
||||
add r9, r10, #(120*2 - 16) @ &WebRtcIsacfix_kSinTab2[119 - 8]
|
||||
|
||||
|
||||
mov r5, #-32
|
||||
@@ -349,24 +352,24 @@ TransformAndFindMax:
|
||||
|
||||
subs r8, #16
|
||||
|
||||
vld1.16 {q0}, [r9:64] @ kCosTab2[]
|
||||
vld1.16 {q0}, [r9, :64] @ kCosTab2[]
|
||||
sub r9, #16
|
||||
vld1.16 {q2}, [r0]! @ inreQ7[]
|
||||
vneg.s16 q0, q0
|
||||
vld1.16 {q3}, [r1]! @ inimQ7[]
|
||||
vrev64.16 d0, d0
|
||||
vrev64.16 d1, d1
|
||||
vld1.16 {q1}, [r10:64]! @ kSinTab2[]
|
||||
vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab2[]
|
||||
vswp d0, d1
|
||||
|
||||
vmull.s16 q8, d2, d6 @ kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q9, d3, d7 @ kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q8, d2, d6 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q9, d3, d7 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
|
||||
vmlal.s16 q8, d0, d4 @ kCosTab2[k] * inreQ7[k]
|
||||
vmlal.s16 q9, d1, d5 @ kCosTab2[k] * inreQ7[k]
|
||||
vmull.s16 q12, d0, d6 @ kCosTab2[k] * inimQ7[k]
|
||||
vmull.s16 q13, d1, d7 @ kCosTab2[k] * inimQ7[k]
|
||||
vmlsl.s16 q12, d2, d4 @ kSinTab2[k] * inreQ7[k]
|
||||
vmlsl.s16 q13, d3, d5 @ kSinTab2[k] * inreQ7[k]
|
||||
vmlsl.s16 q12, d2, d4 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k]
|
||||
vmlsl.s16 q13, d3, d5 @ WebRtcIsacfix_kSinTab2[k] * inreQ7[k]
|
||||
|
||||
vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES / 2 - 8 + i]
|
||||
vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES / 2 - 8 + i]
|
||||
@@ -374,15 +377,15 @@ TransformAndFindMax:
|
||||
vrev64.16 q2, q2 @ Reverse the order of the samples
|
||||
vrev64.16 q3, q3 @ Reverse the order of the samples
|
||||
|
||||
vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k]
|
||||
vmlsl.s16 q14, d0, d7 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k]
|
||||
vmlsl.s16 q15, d1, d6 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k]
|
||||
vmull.s16 q14, d2, d5 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q15, d3, d4 @ WebRtcIsacfix_kSinTab2[k] * inimQ7[k]
|
||||
vmlsl.s16 q14, d0, d7 @ q14 -= kCosTab2[k] * inreQ7[k]
|
||||
vmlsl.s16 q15, d1, d6 @ q15 -= kCosTab2[k] * inreQ7[k]
|
||||
|
||||
vmull.s16 q10, d0, d5 @ kCosTab2[k] * inimQ7[]
|
||||
vmull.s16 q11, d1, d4 @ kCosTab2[k] * inimQ7[]
|
||||
vmlal.s16 q10, d2, d7 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
|
||||
vmlal.s16 q11, d3, d6 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
|
||||
vmlal.s16 q10, d2, d7 @ q10 += WebRtcIsacfix_kSinTab2[k] * inreQ7[]
|
||||
vmlal.s16 q11, d3, d6 @ q11 += WebRtcIsacfix_kSinTab2[k] * inreQ7[]
|
||||
|
||||
vshr.s32 q8, q8, #5 @ xrQ16
|
||||
vshr.s32 q9, q9, #5 @ xrQ16
|
||||
@@ -448,9 +451,9 @@ TransformAndFindMax:
|
||||
|
||||
bgt TransformAndFindMax
|
||||
|
||||
adr r10, kSinTab1
|
||||
mov r2, #(kSinTab1 - kCosTab1)
|
||||
sub r9, r10, r2 @ kCosTab1
|
||||
adr r10, WebRtcIsacfix_kSinTab1
|
||||
mov r2, #(WebRtcIsacfix_kSinTab1 - WebRtcIsacfix_kCosTab1)
|
||||
sub r9, r10, r2 @ WebRtcIsacfix_kCosTab1
|
||||
|
||||
@ Find the maximum value in the Neon registers
|
||||
vmax.u32 d12, d13
|
||||
@@ -565,39 +568,39 @@ PostFftShiftDivide:
|
||||
DemodulateAndSeparate:
|
||||
subs r8, #8
|
||||
|
||||
vld1.16 {q0}, [r9:64]! @ kCosTab1[]
|
||||
vmovl.s16 q6, d0 @ kCosTab1[]
|
||||
vld1.16 {q1}, [r10:64]! @ kSinTab1[]
|
||||
vmovl.s16 q7, d1 @ kCosTab1[]
|
||||
vld1.16 {q0}, [r9, :64]! @ WebRtcIsacfix_kCosTab1[]
|
||||
vmovl.s16 q6, d0 @ WebRtcIsacfix_kCosTab1[]
|
||||
vld1.16 {q1}, [r10, :64]! @ WebRtcIsacfix_kSinTab1[]
|
||||
vmovl.s16 q7, d1 @ WebRtcIsacfix_kCosTab1[]
|
||||
vld1.32 {q2, q3}, [r2] @ outre1Q16
|
||||
vmovl.s16 q8, d2 @ kSinTab1[]
|
||||
vmovl.s16 q8, d2 @ WebRtcIsacfix_kSinTab1[]
|
||||
vld1.32 {q4, q5}, [r3] @ outre2Q16
|
||||
vmovl.s16 q9, d3 @ kSinTab1[]
|
||||
vmovl.s16 q9, d3 @ WebRtcIsacfix_kSinTab1[]
|
||||
|
||||
vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q11, d13, d5 @ kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q10, d12, d4 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q11, d13, d5 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q12, d14, d6 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q13, d15, d7 @ WebRtcIsacfix_kCosTab1[k] * outre1Q16[k]
|
||||
|
||||
vmlsl.s32 q10, d16, d8 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q11, d17, d9 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q12, d18, d10 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q13, d19, d11 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q10, d16, d8 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q11, d17, d9 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q12, d18, d10 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q13, d19, d11 @ += WebRtcIsacfix_kSinTab1[k] * outre2Q16[k]
|
||||
|
||||
vrshrn.s64 d20, q10, #10 @ xrQ16
|
||||
vrshrn.s64 d21, q11, #10 @ xrQ16
|
||||
vrshrn.s64 d22, q12, #10 @ xrQ16
|
||||
vrshrn.s64 d23, q13, #10 @ xrQ16
|
||||
|
||||
vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q12, d12, d8 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q13, d13, d9 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q14, d14, d10 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q15, d15, d11 @ WebRtcIsacfix_kCosTab1[k] * outre2Q16[k]
|
||||
|
||||
vmlal.s32 q12, d16, d4 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q13, d17, d5 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q14, d18, d6 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q15, d19, d7 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q12, d16, d4 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q13, d17, d5 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q14, d18, d6 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q15, d19, d7 @ += WebRtcIsacfix_kSinTab1[k] * outre1Q16[k]
|
||||
|
||||
vdup.s32 q4, r0 @ generic -> Neon doesn't cost extra cycles.
|
||||
|
||||
@@ -622,4 +625,3 @@ DemodulateAndSeparate:
|
||||
add sp, sp, #16
|
||||
vpop {q4-q7}
|
||||
pop {r4-r11,pc}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user