Optimized WebRtcIsacfix_Time2Spec() for iSAC-Fix in ARM Neon processor.
Review URL: https://webrtc-codereview.appspot.com/1005004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3404 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
5dfb1f2cd3
commit
4782911572
@ -25,68 +25,76 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int WebRtcIsacfix_EstimateBandwidth(BwEstimatorstr *bwest_str,
|
||||
Bitstr_dec *streamdata,
|
||||
WebRtc_Word32 packet_size,
|
||||
WebRtc_UWord16 rtp_seq_number,
|
||||
WebRtc_UWord32 send_ts,
|
||||
WebRtc_UWord32 arr_ts);
|
||||
int WebRtcIsacfix_EstimateBandwidth(BwEstimatorstr* bwest_str,
|
||||
Bitstr_dec* streamdata,
|
||||
WebRtc_Word32 packet_size,
|
||||
WebRtc_UWord16 rtp_seq_number,
|
||||
WebRtc_UWord32 send_ts,
|
||||
WebRtc_UWord32 arr_ts);
|
||||
|
||||
WebRtc_Word16 WebRtcIsacfix_DecodeImpl(WebRtc_Word16 *signal_out16,
|
||||
ISACFIX_DecInst_t *ISACdec_obj,
|
||||
WebRtc_Word16 *current_framesamples);
|
||||
WebRtc_Word16 WebRtcIsacfix_DecodeImpl(WebRtc_Word16* signal_out16,
|
||||
ISACFIX_DecInst_t* ISACdec_obj,
|
||||
WebRtc_Word16* current_framesamples);
|
||||
|
||||
WebRtc_Word16 WebRtcIsacfix_DecodePlcImpl(WebRtc_Word16 *decoded,
|
||||
ISACFIX_DecInst_t *ISACdec_obj,
|
||||
WebRtc_Word16 *current_framesample );
|
||||
WebRtc_Word16 WebRtcIsacfix_DecodePlcImpl(WebRtc_Word16* decoded,
|
||||
ISACFIX_DecInst_t* ISACdec_obj,
|
||||
WebRtc_Word16* current_framesample );
|
||||
|
||||
int WebRtcIsacfix_EncodeImpl(WebRtc_Word16 *in,
|
||||
ISACFIX_EncInst_t *ISACenc_obj,
|
||||
BwEstimatorstr *bw_estimatordata,
|
||||
WebRtc_Word16 CodingMode);
|
||||
int WebRtcIsacfix_EncodeImpl(WebRtc_Word16* in,
|
||||
ISACFIX_EncInst_t* ISACenc_obj,
|
||||
BwEstimatorstr* bw_estimatordata,
|
||||
WebRtc_Word16 CodingMode);
|
||||
|
||||
int WebRtcIsacfix_EncodeStoredData(ISACFIX_EncInst_t *ISACenc_obj,
|
||||
int BWnumber,
|
||||
float scale);
|
||||
int WebRtcIsacfix_EncodeStoredData(ISACFIX_EncInst_t* ISACenc_obj,
|
||||
int BWnumber,
|
||||
float scale);
|
||||
|
||||
/* initialization functions */
|
||||
|
||||
void WebRtcIsacfix_InitMaskingEnc(MaskFiltstr_enc *maskdata);
|
||||
void WebRtcIsacfix_InitMaskingDec(MaskFiltstr_dec *maskdata);
|
||||
void WebRtcIsacfix_InitMaskingEnc(MaskFiltstr_enc* maskdata);
|
||||
void WebRtcIsacfix_InitMaskingDec(MaskFiltstr_dec* maskdata);
|
||||
|
||||
void WebRtcIsacfix_InitPreFilterbank(PreFiltBankstr *prefiltdata);
|
||||
void WebRtcIsacfix_InitPreFilterbank(PreFiltBankstr* prefiltdata);
|
||||
|
||||
void WebRtcIsacfix_InitPostFilterbank(PostFiltBankstr *postfiltdata);
|
||||
void WebRtcIsacfix_InitPostFilterbank(PostFiltBankstr* postfiltdata);
|
||||
|
||||
void WebRtcIsacfix_InitPitchFilter(PitchFiltstr *pitchfiltdata);
|
||||
void WebRtcIsacfix_InitPitchFilter(PitchFiltstr* pitchfiltdata);
|
||||
|
||||
void WebRtcIsacfix_InitPitchAnalysis(PitchAnalysisStruct *State);
|
||||
void WebRtcIsacfix_InitPitchAnalysis(PitchAnalysisStruct* State);
|
||||
|
||||
void WebRtcIsacfix_InitPlc( PLCstr *State );
|
||||
void WebRtcIsacfix_InitPlc(PLCstr* State);
|
||||
|
||||
|
||||
/* transform functions */
|
||||
|
||||
void WebRtcIsacfix_InitTransform();
|
||||
|
||||
|
||||
void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9,
|
||||
WebRtc_Word16 *inre2Q9,
|
||||
WebRtc_Word16 *outre,
|
||||
WebRtc_Word16 *outim);
|
||||
|
||||
typedef void (*Time2Spec)(WebRtc_Word16* inre1Q9,
|
||||
WebRtc_Word16* inre2Q9,
|
||||
WebRtc_Word16* outre,
|
||||
WebRtc_Word16* outim);
|
||||
typedef void (*Spec2Time)(WebRtc_Word16* inreQ7,
|
||||
WebRtc_Word16* inimQ7,
|
||||
WebRtc_Word32* outre1Q16,
|
||||
WebRtc_Word32* outre2Q16);
|
||||
|
||||
extern Time2Spec WebRtcIsacfix_Time2Spec;
|
||||
extern Spec2Time WebRtcIsacfix_Spec2Time;
|
||||
|
||||
void WebRtcIsacfix_Time2SpecC(WebRtc_Word16* inre1Q9,
|
||||
WebRtc_Word16* inre2Q9,
|
||||
WebRtc_Word16* outre,
|
||||
WebRtc_Word16* outim);
|
||||
void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16* inreQ7,
|
||||
WebRtc_Word16* inimQ7,
|
||||
WebRtc_Word32* outre1Q16,
|
||||
WebRtc_Word32* outre2Q16);
|
||||
|
||||
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
||||
void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9,
|
||||
WebRtc_Word16* inre2Q9,
|
||||
WebRtc_Word16* outre,
|
||||
WebRtc_Word16* outim);
|
||||
void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16* inreQ7,
|
||||
WebRtc_Word16* inimQ7,
|
||||
WebRtc_Word32* outre1Q16,
|
||||
@ -94,52 +102,50 @@ void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16* inreQ7,
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/* filterbank functions */
|
||||
|
||||
void WebRtcIsacfix_SplitAndFilter1(WebRtc_Word16 *in,
|
||||
WebRtc_Word16 *LP16,
|
||||
WebRtc_Word16 *HP16,
|
||||
PreFiltBankstr *prefiltdata);
|
||||
void WebRtcIsacfix_SplitAndFilter1(WebRtc_Word16* in,
|
||||
WebRtc_Word16* LP16,
|
||||
WebRtc_Word16* HP16,
|
||||
PreFiltBankstr* prefiltdata);
|
||||
|
||||
void WebRtcIsacfix_FilterAndCombine1(WebRtc_Word16 *tempin_ch1,
|
||||
WebRtc_Word16 *tempin_ch2,
|
||||
WebRtc_Word16 *out16,
|
||||
PostFiltBankstr *postfiltdata);
|
||||
void WebRtcIsacfix_FilterAndCombine1(WebRtc_Word16* tempin_ch1,
|
||||
WebRtc_Word16* tempin_ch2,
|
||||
WebRtc_Word16* out16,
|
||||
PostFiltBankstr* postfiltdata);
|
||||
|
||||
#ifdef WEBRTC_ISAC_FIX_NB_CALLS_ENABLED
|
||||
|
||||
void WebRtcIsacfix_SplitAndFilter2(WebRtc_Word16 *in,
|
||||
WebRtc_Word16 *LP16,
|
||||
WebRtc_Word16 *HP16,
|
||||
PreFiltBankstr *prefiltdata);
|
||||
void WebRtcIsacfix_SplitAndFilter2(WebRtc_Word16* in,
|
||||
WebRtc_Word16* LP16,
|
||||
WebRtc_Word16* HP16,
|
||||
PreFiltBankstr* prefiltdata);
|
||||
|
||||
void WebRtcIsacfix_FilterAndCombine2(WebRtc_Word16 *tempin_ch1,
|
||||
WebRtc_Word16 *tempin_ch2,
|
||||
WebRtc_Word16 *out16,
|
||||
PostFiltBankstr *postfiltdata,
|
||||
WebRtc_Word16 len);
|
||||
void WebRtcIsacfix_FilterAndCombine2(WebRtc_Word16* tempin_ch1,
|
||||
WebRtc_Word16* tempin_ch2,
|
||||
WebRtc_Word16* out16,
|
||||
PostFiltBankstr* postfiltdata,
|
||||
WebRtc_Word16 len);
|
||||
|
||||
#endif
|
||||
|
||||
/* normalized lattice filters */
|
||||
|
||||
void WebRtcIsacfix_NormLatticeFilterMa(WebRtc_Word16 orderCoef,
|
||||
WebRtc_Word32 *stateGQ15,
|
||||
WebRtc_Word16 *lat_inQ0,
|
||||
WebRtc_Word16 *filt_coefQ15,
|
||||
WebRtc_Word32 *gain_lo_hiQ17,
|
||||
WebRtc_Word32* stateGQ15,
|
||||
WebRtc_Word16* lat_inQ0,
|
||||
WebRtc_Word16* filt_coefQ15,
|
||||
WebRtc_Word32* gain_lo_hiQ17,
|
||||
WebRtc_Word16 lo_hi,
|
||||
WebRtc_Word16 *lat_outQ9);
|
||||
WebRtc_Word16* lat_outQ9);
|
||||
|
||||
void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
||||
WebRtc_Word16 *stateGQ0,
|
||||
WebRtc_Word32 *lat_inQ25,
|
||||
WebRtc_Word16 *filt_coefQ15,
|
||||
WebRtc_Word32 *gain_lo_hiQ17,
|
||||
WebRtc_Word16* stateGQ0,
|
||||
WebRtc_Word32* lat_inQ25,
|
||||
WebRtc_Word16* filt_coefQ15,
|
||||
WebRtc_Word32* gain_lo_hiQ17,
|
||||
WebRtc_Word16 lo_hi,
|
||||
WebRtc_Word16 *lat_outQ0);
|
||||
WebRtc_Word16* lat_outQ0);
|
||||
|
||||
/* TODO(kma): Remove the following functions into individual header files. */
|
||||
|
||||
|
@ -183,6 +183,7 @@ static void WebRtcIsacfix_InitNeon(void) {
|
||||
WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrNeon;
|
||||
WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopNeon;
|
||||
WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeNeon;
|
||||
WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecNeon;
|
||||
WebRtcIsacfix_CalculateResidualEnergy =
|
||||
WebRtcIsacfix_CalculateResidualEnergyNeon;
|
||||
WebRtcIsacfix_AllpassFilter2FixDec16 =
|
||||
@ -273,10 +274,9 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
|
||||
WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopC;
|
||||
WebRtcIsacfix_CalculateResidualEnergy =
|
||||
WebRtcIsacfix_CalculateResidualEnergyC;
|
||||
WebRtcIsacfix_AllpassFilter2FixDec16 =
|
||||
WebRtcIsacfix_AllpassFilter2FixDec16C;
|
||||
WebRtcIsacfix_Spec2Time =
|
||||
WebRtcIsacfix_Spec2TimeC;
|
||||
WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C;
|
||||
WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecC;
|
||||
WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeC;
|
||||
|
||||
#ifdef WEBRTC_DETECT_ARM_NEON
|
||||
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
|
||||
|
@ -79,23 +79,6 @@ const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = {
|
||||
};
|
||||
|
||||
|
||||
/* Cosine table 2 in Q14 */
|
||||
const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = {
|
||||
107, -322, 536, -750, 965, -1179, 1392, -1606, 1819, -2032,
|
||||
2245, -2457, 2669, -2880, 3091, -3301, 3511, -3720, 3929, -4137,
|
||||
4344, -4550, 4756, -4961, 5165, -5368, 5570, -5771, 5971, -6171,
|
||||
6369, -6566, 6762, -6957, 7150, -7342, 7534, -7723, 7912, -8099,
|
||||
8285, -8469, 8652, -8833, 9013, -9191, 9368, -9543, 9717, -9889,
|
||||
10059, -10227, 10394, -10559, 10722, -10883, 11042, -11200, 11356, -11509,
|
||||
11661, -11810, 11958, -12104, 12247, -12389, 12528, -12665, 12800, -12933,
|
||||
13063, -13192, 13318, -13441, 13563, -13682, 13799, -13913, 14025, -14135,
|
||||
14242, -14347, 14449, -14549, 14647, -14741, 14834, -14924, 15011, -15095,
|
||||
15178, -15257, 15334, -15408, 15480, -15549, 15615, -15679, 15739, -15798,
|
||||
15853, -15906, 15956, -16003, 16048, -16090, 16129, -16165, 16199, -16229,
|
||||
16257, -16283, 16305, -16325, 16342, -16356, 16367, -16375, 16381, -16384
|
||||
};
|
||||
|
||||
|
||||
/* Sine table 2 in Q14 */
|
||||
const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
|
||||
16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
|
||||
@ -112,10 +95,11 @@ const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = {
|
||||
2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
|
||||
};
|
||||
|
||||
// Declare a function pointer.
|
||||
// Declare function pointers.
|
||||
Spec2Time WebRtcIsacfix_Spec2Time;
|
||||
Time2Spec WebRtcIsacfix_Time2Spec;
|
||||
|
||||
void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9,
|
||||
void WebRtcIsacfix_Time2SpecC(WebRtc_Word16 *inre1Q9,
|
||||
WebRtc_Word16 *inre2Q9,
|
||||
WebRtc_Word16 *outreQ7,
|
||||
WebRtc_Word16 *outimQ7)
|
||||
@ -187,7 +171,7 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9,
|
||||
yiQ16 = -tmpreQ16[k] + tmpreQ16[FRAMESAMPLES/2 - 1 - k];
|
||||
xiQ16 = tmpimQ16[k] - tmpimQ16[FRAMESAMPLES/2 - 1 - k];
|
||||
yrQ16 = tmpimQ16[k] + tmpimQ16[FRAMESAMPLES/2 - 1 - k];
|
||||
tmp1rQ14 = kCosTab2[k];
|
||||
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k];
|
||||
tmp1iQ14 = kSinTab2[k];
|
||||
v1Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xrQ16) - WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xiQ16);
|
||||
v2Q16 = WEBRTC_SPL_MUL_16_32_RSFT14(tmp1iQ14, xrQ16) + WEBRTC_SPL_MUL_16_32_RSFT14(tmp1rQ14, xiQ16);
|
||||
@ -214,7 +198,7 @@ void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebR
|
||||
|
||||
for (k = 0; k < FRAMESAMPLES/4; k++) {
|
||||
/* Move zero in time to beginning of frames */
|
||||
tmp1rQ14 = kCosTab2[k];
|
||||
tmp1rQ14 = -kSinTab2[FRAMESAMPLES/4 - 1 - k];
|
||||
tmp1iQ14 = kSinTab2[k];
|
||||
|
||||
tmpInRe = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) inreQ7[k], 9); // Q7 -> Q16
|
||||
|
@ -15,6 +15,302 @@
|
||||
#include "webrtc/system_wrappers/interface/asm_defines.h"
|
||||
|
||||
GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon
|
||||
GLOBAL_FUNCTION WebRtcIsacfix_Time2SpecNeon
|
||||
|
||||
@ void WebRtcIsacfix_Time2SpecNeon(WebRtc_Word16* inre1Q9,
|
||||
@ WebRtc_Word16* inre2Q9,
|
||||
@ WebRtc_Word16* outreQ7,
|
||||
@ WebRtc_Word16* outimQ7);
|
||||
|
||||
DEFINE_FUNCTION WebRtcIsacfix_Time2SpecNeon
|
||||
.align 2
|
||||
push {r4-r11,lr}
|
||||
vpush {q4-q7}
|
||||
sub sp, sp, #(16 + FRAMESAMPLES * 4)
|
||||
|
||||
str r0, [sp] @ inre1Q9
|
||||
str r1, [sp, #4] @ inre2Q9
|
||||
str r2, [sp, #8] @ outreQ7
|
||||
str r3, [sp, #12] @ outimQ7
|
||||
|
||||
mov r8, #(FRAMESAMPLES - 16)
|
||||
add r12, r0, r8 @ &inreQ7[FRAMESAMPLES / 2 - 4]
|
||||
add r11, r1, r8 @ &inimQ7[FRAMESAMPLES / 2 - 4]
|
||||
add r4, sp, #16 @ tmpreQ16;
|
||||
add r5, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16;
|
||||
|
||||
adr r9, kCosTab1
|
||||
mov r6, #(kSinTab1 - kCosTab1)
|
||||
add r10, r9, r6 @ kSinTab1
|
||||
|
||||
vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm.
|
||||
vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe.
|
||||
movw r6, #16921 @ 0.5 / sqrt(240) in Q19
|
||||
lsl r6, #5 @ Together with vqdmulh, net effect is ">> 26".
|
||||
mov r8, #(FRAMESAMPLES / 2) @ loop counter
|
||||
vdup.s32 q4, r6
|
||||
|
||||
Time2Spec_TransformAndFindMax:
|
||||
@ Use ">> 26", instead of ">> 7", ">> 16" and then ">> 3" as in the C code.
|
||||
|
||||
subs r8, #8
|
||||
|
||||
vld1.16 {q0}, [r9:64]! @ kCosTab1[]
|
||||
vld1.16 {q2}, [r0]! @ inre1Q9[]
|
||||
vmull.s16 q8, d0, d4 @ kCosTab1[k] * inre1Q9[k]
|
||||
vld1.16 {q1}, [r10:64]! @ kSinTab1[]
|
||||
vmull.s16 q9, d1, d5 @ kCosTab1[k] * inre1Q9[k]
|
||||
vld1.16 {q3}, [r1]! @ inre2Q9[]
|
||||
vmlal.s16 q8, d2, d6 @ kSinTab1[k] * inre2Q9[k]
|
||||
vmlal.s16 q9, d3, d7 @ kSinTab1[k] * inre2Q9[k]
|
||||
vmull.s16 q12, d0, d6 @ kCosTab1[k] * inre2Q9[k]
|
||||
vmull.s16 q13, d1, d7 @ kCosTab1[k] * inre2Q9[k]
|
||||
vmlsl.s16 q12, d2, d4 @ kSinTab1[k] * inre1Q9[k]
|
||||
vmlsl.s16 q13, d3, d5 @ kSinTab1[k] * inre1Q9[k]
|
||||
|
||||
vqdmulh.s32 q0, q8, q4 @ xrQ16 * factQ19
|
||||
vqdmulh.s32 q1, q9, q4 @ xrQ16 * factQ19
|
||||
vqdmulh.s32 q2, q12, q4 @ xrQ16 * factQ19
|
||||
vqdmulh.s32 q3, q13, q4 @ xrQ16 * factQ19
|
||||
|
||||
@ Find the absolute maximum in the vectors and store them in q6 and q7.
|
||||
vabs.s32 q10, q0
|
||||
vabs.s32 q11, q1
|
||||
vabs.s32 q12, q2
|
||||
vst1.32 {q0, q1}, [r4]! @ tmpreQ16[k]
|
||||
vabs.s32 q13, q3
|
||||
vmax.u32 q6, q10 @ Use u32 so we don't lose the value 0x80000000.
|
||||
vmax.u32 q7, q12
|
||||
vst1.32 {q2, q3}, [r5]! @ tmpimQ16[k]
|
||||
vmax.u32 q7, q13
|
||||
vmax.u32 q6, q11 @ Maximum for outre1Q16[].
|
||||
|
||||
bgt Time2Spec_TransformAndFindMax
|
||||
|
||||
@ Find the maximum value in the Neon registers
|
||||
vmax.u32 d12, d13
|
||||
vmax.u32 d14, d15
|
||||
vpmax.u32 d12, d12, d12 @ Both 32 bits words hold the same value tmpInIm.
|
||||
vpmax.u32 d14, d14, d14 @ Both 32 bits words hold the same value tmpInRe.
|
||||
vmax.s32 d14, d12, d14 @ if (yrQ16 > xrQ16) {xrQ16 = yrQ16};
|
||||
|
||||
ldr r4, [sp] @ inre1Q9
|
||||
vcls.s32 d15, d14 @ sh = WebRtcSpl_NormW32(tmpInRe);
|
||||
ldr r5, [sp, #4] @ inre2Q9
|
||||
vmov.i32 d14, #24
|
||||
add r6, sp, #16 @ tmpreQ16;
|
||||
vsub.s32 d15, d15, d14 @ sh = sh - 24;
|
||||
add r7, sp, #(16 + FRAMESAMPLES * 2) @ tmpimQ16;
|
||||
vdup.s32 q8, d15[0] @ sh
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 2) @ loop counter
|
||||
|
||||
Time2Spec_PreFftShift:
|
||||
subs r8, #16
|
||||
|
||||
vld1.32 {q0, q1}, [r6]! @ tmpreQ16[]
|
||||
vrshl.s32 q0, q0, q8
|
||||
vld1.32 {q2, q3}, [r6]! @ tmpreQ16[]
|
||||
vrshl.s32 q1, q1, q8
|
||||
vld1.32 {q4, q5}, [r7]! @ tmpimQ16[]
|
||||
vrshl.s32 q2, q2, q8
|
||||
vld1.32 {q6, q7}, [r7]! @ tmpimQ16[]
|
||||
vrshl.s32 q3, q3, q8
|
||||
vrshl.s32 q4, q4, q8
|
||||
vrshl.s32 q5, q5, q8
|
||||
vrshl.s32 q6, q6, q8
|
||||
vrshl.s32 q7, q7, q8
|
||||
|
||||
vmovn.s32 d0, q0
|
||||
vmovn.s32 d1, q1
|
||||
vmovn.s32 d2, q2
|
||||
vmovn.s32 d3, q3
|
||||
vmovn.s32 d4, q4
|
||||
vmovn.s32 d5, q5
|
||||
vmovn.s32 d6, q6
|
||||
vmovn.s32 d7, q7
|
||||
|
||||
vst1.16 {q0, q1}, [r4]! @ inre1Q9[]
|
||||
vst1.16 {q2, q3}, [r5]! @ inre2Q9[]
|
||||
|
||||
bgt Time2Spec_PreFftShift
|
||||
|
||||
ldr r0, [sp] @ inre1Q9
|
||||
ldr r1, [sp, #4] @ inre2Q9
|
||||
mov r2, #-1
|
||||
CALL_FUNCTION WebRtcIsacfix_FftRadix16Fastest
|
||||
|
||||
vneg.s32 q15, q8 @ -sh
|
||||
vmov.i32 q0, #23
|
||||
vsub.s32 q15, q15, q0 @ -sh - 23
|
||||
|
||||
mov r8, #(FRAMESAMPLES - 8)
|
||||
ldr r2, [sp, #8] @ outreQ7
|
||||
ldr r3, [sp, #12] @ outimQ7
|
||||
add r11, r2, r8 @ &outRe1Q16[FRAMESAMPLES / 2 - 4]
|
||||
add r12, r3, r8 @ &outim2Q16[FRAMESAMPLES / 2 - 4]
|
||||
ldr r6, [sp] @ inre1Q9
|
||||
ldr r7, [sp, #4] @ inre2Q9
|
||||
add r4, r6, r8 @ &inre1Q9[FRAMESAMPLES / 2 - 4]
|
||||
add r5, r7, r8 @ &inre2Q9[FRAMESAMPLES / 2 - 4]
|
||||
adr r10, kSinTab2
|
||||
|
||||
add r9, r10, #(120*2 - 8) @ &kSinTab2[119 - 4]
|
||||
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 4) @ loop counter
|
||||
|
||||
@ Pre-load variables.
|
||||
vld1.16 {d2}, [r4] @ inre1Q9[FRAMESAMPLES / 2 - 4 - i]
|
||||
vld1.16 {d3}, [r5] @ inre2Q9[FRAMESAMPLES / 2 - 4 - i]
|
||||
vld1.16 {d0}, [r6]! @ inre1Q9
|
||||
vld1.16 {d1}, [r7]! @ inre2Q9
|
||||
|
||||
Time2Spec_PostFftTransform:
|
||||
@ By vshl, we effectively did "<< (-sh - 23)", instead of "<< (-sh)",
|
||||
@ ">> 14" and then ">> 9" as in the C code.
|
||||
|
||||
vld1.16 {d6}, [r9:64] @ kCosTab2[]
|
||||
vneg.s16 d6, d6
|
||||
vld1.16 {d7}, [r10:64]! @ kSinTab2[]
|
||||
vrev64.16 q1, q1 @ Reverse samples in 2nd half of xrQ16[].
|
||||
vqadd.s16 d4, d0, d2 @ xrQ16
|
||||
vqsub.s16 d5, d1, d3 @ xiQ16
|
||||
vrev64.16 d6, d6
|
||||
|
||||
sub r9, #8 @ Update pointers for kCosTab2[].
|
||||
sub r4, #8 @ Update pointers for inre1Q9[].
|
||||
sub r5, #8 @ Update pointers for inr22Q9[].
|
||||
subs r8, #4 @ Update loop counter.
|
||||
|
||||
vqadd.s16 d1, d1, d3 @ yrQ16
|
||||
vqsub.s16 d0, d2, d0 @ yiQ16
|
||||
|
||||
vmull.s16 q12, d6, d4 @ kCosTab2[k] * xrQ16
|
||||
vmlsl.s16 q12, d7, d5 @ kSinTab2[k] * xiQ16
|
||||
vmull.s16 q13, d7, d4 @ kSinTab2[k] * xrQ16
|
||||
vmlal.s16 q13, d6, d5 @ kCosTab2[k] * xiQ16
|
||||
vmull.s16 q6, d7, d1 @ kSinTab2[k] * yrQ16
|
||||
vmlal.s16 q6, d6, d0 @ kCosTab2[k] * yiQ16
|
||||
vmull.s16 q7, d7, d0 @ kSinTab2[k] * yiQ16
|
||||
vmlsl.s16 q7, d6, d1 @ kCosTab2[k] * yrQ16
|
||||
|
||||
vshl.s32 q12, q12, q15
|
||||
vshl.s32 q13, q13, q15
|
||||
vshl.s32 q6, q6, q15
|
||||
vshl.s32 q7, q7, q15
|
||||
|
||||
vneg.s32 q8, q6
|
||||
vld1.16 {d0}, [r6]! @ inre1Q9
|
||||
vmovn.s32 d8, q12
|
||||
vld1.16 {d1}, [r7]! @ inre2Q9
|
||||
vmovn.s32 d9, q13
|
||||
vld1.16 {d2}, [r4] @ inre1Q9[FRAMESAMPLES / 2 - 4 - i]
|
||||
vmovn.s32 d5, q7
|
||||
vld1.16 {d3}, [r5] @ inre2Q9[FRAMESAMPLES / 2 - 4 - i]
|
||||
vmovn.s32 d4, q8
|
||||
vst1.16 {d8}, [r2]! @ outreQ7[k]
|
||||
vrev64.16 q2, q2 @ Reverse the order of the samples.
|
||||
vst1.16 {d9}, [r3]! @ outimQ7[k]
|
||||
vst1.16 {d4}, [r11] @ outreQ7[FRAMESAMPLES / 2 - 1 - k]
|
||||
vst1.16 {d5}, [r12] @ outimQ7[FRAMESAMPLES / 2 - 1 - k]
|
||||
sub r11, #8 @ Update pointers for outreQ7[].
|
||||
sub r12, #8 @ Update pointers for outimQ7[].
|
||||
|
||||
bgt Time2Spec_PostFftTransform
|
||||
|
||||
add sp, sp, #(16 + FRAMESAMPLES * 4)
|
||||
vpop {q4-q7}
|
||||
pop {r4-r11,pc}
|
||||
|
||||
.align 8
|
||||
@ Cosine table 1 in Q14
|
||||
kCosTab1:
|
||||
.short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315
|
||||
.short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069
|
||||
.short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647
|
||||
.short 15582, 15515, 15444, 15371, 15296, 15218, 15137, 15053
|
||||
.short 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295
|
||||
.short 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380
|
||||
.short 13255, 13128, 12998, 12867, 12733, 12597, 12458, 12318
|
||||
.short 12176, 12031, 11885, 11736, 11585, 11433, 11278, 11121
|
||||
.short 10963, 10803, 10641, 10477, 10311, 10143, 9974, 9803
|
||||
.short 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377
|
||||
.short 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859
|
||||
.short 6664, 6467, 6270, 6071, 5872, 5671, 5469, 5266
|
||||
.short 5063, 4859, 4653, 4447, 4240, 4033, 3825, 3616
|
||||
.short 3406, 3196, 2986, 2775, 2563, 2351, 2139, 1926
|
||||
.short 1713, 1499, 1285, 1072, 857, 643, 429, 214
|
||||
.short 0, -214, -429, -643, -857, -1072, -1285, -1499
|
||||
.short -1713, -1926, -2139, -2351, -2563, -2775, -2986, -3196
|
||||
.short -3406, -3616, -3825, -4033, -4240, -4447, -4653, -4859
|
||||
.short -5063, -5266, -5469, -5671, -5872, -6071, -6270, -6467
|
||||
.short -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006
|
||||
.short -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456
|
||||
.short -9630, -9803, -9974, -10143, -10311, -10477, -10641, -10803
|
||||
.short -10963, -11121, -11278, -11433, -11585, -11736, -11885, -12031
|
||||
.short -12176, -12318, -12458, -12597, -12733, -12867, -12998, -13128
|
||||
.short -13255, -13380, -13502, -13623, -13741, -13856, -13970, -14081
|
||||
.short -14189, -14295, -14399, -14500, -14598, -14694, -14788, -14879
|
||||
.short -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515
|
||||
.short -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980
|
||||
.short -16026, -16069, -16110, -16147, -16182, -16214, -16244, -16270
|
||||
.short -16294, -16315, -16333, -16349, -16362, -16371, -16378, -16383
|
||||
|
||||
.align 8
|
||||
@ Sine table 2 in Q14
|
||||
kSinTab2:
|
||||
.short 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305
|
||||
.short 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048
|
||||
.short 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615
|
||||
.short 15549, -15480, 15408, -15334, 15257, -15178, 15095, -15011
|
||||
.short 14924, -14834, 14741, -14647, 14549, -14449, 14347, -14242
|
||||
.short 14135, -14025, 13913, -13799, 13682, -13563, 13441, -13318
|
||||
.short 13192, -13063, 12933, -12800, 12665, -12528, 12389, -12247
|
||||
.short 12104, -11958, 11810, -11661, 11509, -11356, 11200, -11042
|
||||
.short 10883, -10722, 10559, -10394, 10227, -10059, 9889, -9717
|
||||
.short 9543, -9368, 9191, -9013, 8833, -8652, 8469, -8285
|
||||
.short 8099, -7912, 7723, -7534, 7342, -7150, 6957, -6762
|
||||
.short 6566, -6369, 6171, -5971, 5771, -5570, 5368, -5165
|
||||
.short 4961, -4756, 4550, -4344, 4137, -3929, 3720, -3511
|
||||
.short 3301, -3091, 2880, -2669, 2457, -2245, 2032, -1819
|
||||
.short 1606, -1392, 1179, -965, 750, -536, 322, -107
|
||||
|
||||
@ Table kCosTab2 was removed since its data is redundant with kSinTab2.
|
||||
|
||||
.align 8
|
||||
@ Sine table 1 in Q14
|
||||
kSinTab1:
|
||||
.short 0, 214, 429, 643, 857, 1072, 1285, 1499
|
||||
.short 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196
|
||||
.short 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859
|
||||
.short 5063, 5266, 5469, 5671, 5872, 6071, 6270, 6467
|
||||
.short 6664, 6859, 7053, 7246, 7438, 7629, 7818, 8006
|
||||
.short 8192, 8377, 8561, 8743, 8923, 9102, 9280, 9456
|
||||
.short 9630, 9803, 9974, 10143, 10311, 10477, 10641, 10803
|
||||
.short 10963, 11121, 11278, 11433, 11585, 11736, 11885, 12031
|
||||
.short 12176, 12318, 12458, 12597, 12733, 12867, 12998, 13128
|
||||
.short 13255, 13380, 13502, 13623, 13741, 13856, 13970, 14081
|
||||
.short 14189, 14295, 14399, 14500, 14598, 14694, 14788, 14879
|
||||
.short 14968, 15053, 15137, 15218, 15296, 15371, 15444, 15515
|
||||
.short 15582, 15647, 15709, 15769, 15826, 15880, 15931, 15980
|
||||
.short 16026, 16069, 16110, 16147, 16182, 16214, 16244, 16270
|
||||
.short 16294, 16315, 16333, 16349, 16362, 16371, 16378, 16383
|
||||
.short 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315
|
||||
.short 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069
|
||||
.short 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647
|
||||
.short 15582, 15515, 15444, 15371, 15296, 15218, 15137, 15053
|
||||
.short 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295
|
||||
.short 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380
|
||||
.short 13255, 13128, 12998, 12867, 12733, 12597, 12458, 12318
|
||||
.short 12176, 12031, 11885, 11736, 11585, 11433, 11278, 11121
|
||||
.short 10963, 10803, 10641, 10477, 10311, 10143, 9974, 9803
|
||||
.short 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377
|
||||
.short 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859
|
||||
.short 6664, 6467, 6270, 6071, 5872, 5671, 5469, 5266
|
||||
.short 5063, 4859, 4653, 4447, 4240, 4033, 3825, 3616
|
||||
.short 3406, 3196, 2986, 2775, 2563, 2351, 2139, 1926
|
||||
.short 1713, 1499, 1285, 1072, 857, 643, 429, 214
|
||||
|
||||
@ void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16 *inreQ7,
|
||||
@ WebRtc_Word16 *inimQ7,
|
||||
@ -32,70 +328,66 @@ DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon
|
||||
str r3, [sp, #12] @ outre2Q16
|
||||
|
||||
mov r8, #(FRAMESAMPLES - 16)
|
||||
add r12, r0, r8 @ &inreQ7[FRAMESAMPLES/2 - 8]
|
||||
add r11, r1, r8 @ &inimQ7[FRAMESAMPLES/2 - 8]
|
||||
add r4, r2, r8, lsl #1 @ &outRe1Q16[FRAMESAMPLES/2 - 8]
|
||||
add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES/2 - 8]
|
||||
add r12, r0, r8 @ &inreQ7[FRAMESAMPLES / 2 - 8]
|
||||
add r11, r1, r8 @ &inimQ7[FRAMESAMPLES / 2 - 8]
|
||||
add r4, r2, r8, lsl #1 @ &outRe1Q16[FRAMESAMPLES / 2 - 8]
|
||||
add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES / 2 - 8]
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 2) @ loop counter
|
||||
ldr r9, =kCosTab2
|
||||
ldr r10, =kSinTab2
|
||||
adr r10, kSinTab2
|
||||
add r9, r10, #(120*2 - 16) @ &kSinTab2[119 - 8]
|
||||
|
||||
|
||||
mov r5, #-32
|
||||
mov r7, #-16
|
||||
vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm.
|
||||
vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe.
|
||||
|
||||
TRANSFORM_AND_FIND_MAX:
|
||||
TransformAndFindMax:
|
||||
@ Use ">> 5", instead of "<< 9" and then ">> 14" as in the C code.
|
||||
@ Bit-exact.
|
||||
|
||||
vld1.16 {q0}, [r9]! @ kCosTab2[]
|
||||
vld1.16 {q1}, [r10]! @ kSinTab2[]
|
||||
vld1.16 {q2}, [r0]! @ inreQ7[]
|
||||
vld1.16 {q3}, [r1]! @ inimQ7[]
|
||||
|
||||
vmull.s16 q8, d0, d4 @ kCosTab2[k] x inreQ7[k]
|
||||
vmull.s16 q9, d1, d5 @ kCosTab2[k] x inreQ7[k]
|
||||
vmull.s16 q10, d2, d6 @ kSinTab2[k] x inimQ7[k]
|
||||
vmull.s16 q11, d3, d7 @ kSinTab2[k] x inimQ7[k]
|
||||
vmull.s16 q12, d0, d6 @ kCosTab2[k] x inimQ7[k]
|
||||
vmull.s16 q13, d1, d7 @ kCosTab2[k] x inimQ7[k]
|
||||
vmull.s16 q14, d2, d4 @ kSinTab2[k] x inreQ7[k]
|
||||
vmull.s16 q15, d3, d5 @ kSinTab2[k] x inreQ7[k]
|
||||
|
||||
vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES/2 - 9 - i]
|
||||
vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES/2 - 9 - i]
|
||||
|
||||
vadd.s32 q8, q8, q10
|
||||
vadd.s32 q9, q9, q11
|
||||
vsub.s32 q12, q12, q14
|
||||
vsub.s32 q13, q13, q15
|
||||
|
||||
subs r8, #16
|
||||
|
||||
vld1.16 {q0}, [r9:64] @ kCosTab2[]
|
||||
sub r9, #16
|
||||
vld1.16 {q2}, [r0]! @ inreQ7[]
|
||||
vneg.s16 q0, q0
|
||||
vld1.16 {q3}, [r1]! @ inimQ7[]
|
||||
vrev64.16 d0, d0
|
||||
vrev64.16 d1, d1
|
||||
vld1.16 {q1}, [r10:64]! @ kSinTab2[]
|
||||
vswp d0, d1
|
||||
|
||||
vmull.s16 q8, d2, d6 @ kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q9, d3, d7 @ kSinTab2[k] * inimQ7[k]
|
||||
vmlal.s16 q8, d0, d4 @ kCosTab2[k] * inreQ7[k]
|
||||
vmlal.s16 q9, d1, d5 @ kCosTab2[k] * inreQ7[k]
|
||||
vmull.s16 q12, d0, d6 @ kCosTab2[k] * inimQ7[k]
|
||||
vmull.s16 q13, d1, d7 @ kCosTab2[k] * inimQ7[k]
|
||||
vmlsl.s16 q12, d2, d4 @ kSinTab2[k] * inreQ7[k]
|
||||
vmlsl.s16 q13, d3, d5 @ kSinTab2[k] * inreQ7[k]
|
||||
|
||||
vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES / 2 - 8 + i]
|
||||
vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES / 2 - 8 + i]
|
||||
|
||||
vrev64.16 q2, q2 @ Reverse the order of the samples
|
||||
vrev64.16 q3, q3 @ Reverse the order of the samples
|
||||
|
||||
vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k]
|
||||
vmlsl.s16 q14, d0, d7 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k]
|
||||
vmlsl.s16 q15, d1, d6 @ kSinTab2[k] * inimQ7[k] - kCosTab2[k] *inreQ7[k]
|
||||
|
||||
vmull.s16 q10, d0, d5 @ kCosTab2[k] * inimQ7[]
|
||||
vmull.s16 q11, d1, d4 @ kCosTab2[k] * inimQ7[]
|
||||
vmlal.s16 q10, d2, d7 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
|
||||
vmlal.s16 q11, d3, d6 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
|
||||
|
||||
vshr.s32 q8, q8, #5 @ xrQ16
|
||||
vshr.s32 q9, q9, #5 @ xrQ16
|
||||
vshr.s32 q12, q12, #5 @ xiQ16
|
||||
vshr.s32 q13, q13, #5 @ xiQ16
|
||||
|
||||
vmull.s16 q10, d0, d7 @ kCosTab2[k] * inreQ7[k]
|
||||
vmull.s16 q11, d1, d6 @ kCosTab2[k] * inreQ7[k]
|
||||
vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k]
|
||||
vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k]
|
||||
|
||||
vmull.s16 q4, d0, d5 @ kCosTab2[k] * inimQ7[]
|
||||
vmull.s16 q5, d1, d4 @ kCosTab2[k] * inimQ7[]
|
||||
vmull.s16 q0, d2, d7 @ kSinTab2[k] * inreQ7[]
|
||||
vmull.s16 q2, d3, d6 @ kSinTab2[k] * inreQ7[]
|
||||
|
||||
vsub.s32 q14, q14, q10 @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k]
|
||||
vsub.s32 q15, q15, q11 @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k]
|
||||
vadd.s32 q10, q4, q0 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
|
||||
vadd.s32 q11, q5, q2 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[]
|
||||
|
||||
vshr.s32 q14, q14, #5 @ yiQ16
|
||||
vshr.s32 q15, q15, #5 @ yiQ16
|
||||
|
||||
@ -118,8 +410,8 @@ TRANSFORM_AND_FIND_MAX:
|
||||
vadd.s32 q5, q11, q13
|
||||
|
||||
@ yrQ16 - xiQ16
|
||||
vsub.s32 q9, q10, q12
|
||||
vsub.s32 q8, q11, q13
|
||||
vsub.s32 q9, q10, q12
|
||||
|
||||
@ Reverse the order of the samples
|
||||
vrev64.32 q2, q2
|
||||
@ -128,33 +420,37 @@ TRANSFORM_AND_FIND_MAX:
|
||||
vrev64.32 q9, q9
|
||||
vswp d4, d5
|
||||
vswp d6, d7
|
||||
vswp d16, d17
|
||||
vswp d18, d19
|
||||
|
||||
vst1.32 {q0, q1}, [r2]! @ outre1Q16[k]
|
||||
vst1.32 {q2, q3}, [r4], r5 @ outre1Q16[FRAMESAMPLES/2 - 1 - k]
|
||||
vst1.32 {q4, q5}, [r3]! @ outre2Q16[k]
|
||||
vst1.32 {q8, q9}, [r6], r5 @ outre2Q16[FRAMESAMPLES/2 - 1 - k]
|
||||
vswp d16, d17
|
||||
vswp d18, d19
|
||||
vst1.32 {q2, q3}, [r4], r5 @ outre1Q16[FRAMESAMPLES / 2 - 1 - k]
|
||||
|
||||
@ Find the absolute maximum in the vectors and store them in q6 and q7.
|
||||
vabs.s32 q10, q0
|
||||
vabs.s32 q11, q1
|
||||
vabs.s32 q12, q2
|
||||
vabs.s32 q13, q3
|
||||
vabs.s32 q14, q4
|
||||
vmax.u32 q6, q10 @ Use u32 so we don't lose the value 0x80000000.
|
||||
vmax.u32 q7, q14 @ Maximum for outre2Q16[].
|
||||
vabs.s32 q11, q1
|
||||
vabs.s32 q15, q5
|
||||
vmax.u32 q6, q11 @ Maximum for outre1Q16[].
|
||||
vmax.u32 q7, q15
|
||||
vabs.s32 q12, q2
|
||||
vmax.u32 q6, q10 @ Use u32 so we don't lose the value 0x80000000.
|
||||
vmax.u32 q7, q14 @ Maximum for outre2Q16[].
|
||||
vabs.s32 q0, q8
|
||||
vmax.u32 q6, q11 @ Maximum for outre1Q16[].
|
||||
vmax.u32 q7, q15
|
||||
vabs.s32 q13, q3
|
||||
vmax.u32 q6, q12
|
||||
vmax.u32 q7, q0
|
||||
vabs.s32 q1, q9
|
||||
vst1.32 {q4, q5}, [r3]! @ outre2Q16[k]
|
||||
vst1.32 {q8, q9}, [r6], r5 @ outre2Q16[FRAMESAMPLES / 2 - 1 - k]
|
||||
vmax.u32 q6, q13
|
||||
vmax.u32 q7, q1
|
||||
|
||||
bgt TRANSFORM_AND_FIND_MAX
|
||||
bgt TransformAndFindMax
|
||||
|
||||
adr r10, kSinTab1
|
||||
mov r2, #(kSinTab1 - kCosTab1)
|
||||
sub r9, r10, r2 @ kCosTab1
|
||||
|
||||
@ Find the maximum value in the Neon registers
|
||||
vmax.u32 d12, d13
|
||||
@ -174,18 +470,16 @@ TRANSFORM_AND_FIND_MAX:
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 2)
|
||||
|
||||
PRE_FFT_SHIFT:
|
||||
PreFftShift:
|
||||
subs r8, #16
|
||||
vld1.32 {q0, q1}, [r6]! @ outre1Q16[]
|
||||
vld1.32 {q2, q3}, [r6]! @ outre1Q16[]
|
||||
vld1.32 {q4, q5}, [r7]! @ outre2Q16[]
|
||||
vld1.32 {q6, q7}, [r7]! @ outre2Q16[]
|
||||
|
||||
subs r8, #16
|
||||
|
||||
vrshl.s32 q0, q0, q8
|
||||
vrshl.s32 q1, q1, q8
|
||||
vrshl.s32 q2, q2, q8
|
||||
vrshl.s32 q3, q3, q8
|
||||
vld1.32 {q4, q5}, [r7]! @ outre2Q16[]
|
||||
vld1.32 {q6, q7}, [r7]! @ outre2Q16[]
|
||||
vrshl.s32 q4, q4, q8
|
||||
vrshl.s32 q5, q5, q8
|
||||
vrshl.s32 q6, q6, q8
|
||||
@ -203,12 +497,12 @@ PRE_FFT_SHIFT:
|
||||
vst1.16 {q0, q1}, [r4]! @ inreQ7[]
|
||||
vst1.16 {q2, q3}, [r5]! @ inimQ7[]
|
||||
|
||||
bgt PRE_FFT_SHIFT
|
||||
bgt PreFftShift
|
||||
|
||||
ldr r0, [sp] @ inreQ7
|
||||
ldr r1, [sp, #4] @ inimQ7
|
||||
mov r2, #1
|
||||
bl WebRtcIsacfix_FftRadix16Fastest(PLT)
|
||||
CALL_FUNCTION WebRtcIsacfix_FftRadix16Fastest
|
||||
|
||||
ldr r4, [sp] @ inreQ7
|
||||
ldr r5, [sp, #4] @ inimQ7
|
||||
@ -217,93 +511,67 @@ PRE_FFT_SHIFT:
|
||||
mov r8, #(FRAMESAMPLES / 2)
|
||||
vneg.s32 q5, q8 @ -sh
|
||||
movw r0, #273
|
||||
vdup.s32 d8, r0
|
||||
|
||||
POST_FFT_SHIFT_DIVIDE:
|
||||
vld1.16 {q0, q1}, [r4]! @ inreQ7
|
||||
vld1.16 {q2, q3}, [r5]! @ inimQ7
|
||||
lsl r0, #15 @ Together with vqdmulh, net effect is ">> 16".
|
||||
vdup.s32 q4, r0
|
||||
|
||||
PostFftShiftDivide:
|
||||
subs r8, #16
|
||||
|
||||
vld1.16 {q0, q1}, [r4]! @ inreQ7
|
||||
vmovl.s16 q6, d0
|
||||
vmovl.s16 q7, d1
|
||||
vld1.16 {q2, q3}, [r5]! @ inimQ7
|
||||
vmovl.s16 q8, d2
|
||||
vmovl.s16 q9, d3
|
||||
vmovl.s16 q0, d4
|
||||
vmovl.s16 q1, d5
|
||||
vmovl.s16 q2, d6
|
||||
vmovl.s16 q3, d7
|
||||
|
||||
vshl.s32 q6, q6, q5
|
||||
vshl.s32 q7, q7, q5
|
||||
vshl.s32 q8, q8, q5
|
||||
vshl.s32 q9, q9, q5
|
||||
|
||||
vqdmulh.s32 q6, q6, q4
|
||||
vqdmulh.s32 q7, q7, q4
|
||||
vqdmulh.s32 q8, q8, q4
|
||||
vqdmulh.s32 q9, q9, q4
|
||||
|
||||
vmovl.s16 q0, d4
|
||||
vmovl.s16 q1, d5
|
||||
vmovl.s16 q2, d6
|
||||
vmovl.s16 q3, d7
|
||||
|
||||
vshl.s32 q0, q0, q5
|
||||
vshl.s32 q1, q1, q5
|
||||
vshl.s32 q2, q2, q5
|
||||
vshl.s32 q3, q3, q5
|
||||
|
||||
@ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre1Q16[k])
|
||||
vmull.s32 q10, d12, d8
|
||||
vmull.s32 q11, d13, d8
|
||||
vmull.s32 q12, d14, d8
|
||||
vmull.s32 q13, d15, d8
|
||||
vshrn.s64 d12, q10, #16
|
||||
vshrn.s64 d13, q11, #16
|
||||
vshrn.s64 d14, q12, #16
|
||||
vshrn.s64 d15, q13, #16
|
||||
|
||||
vmull.s32 q10, d16, d8
|
||||
vmull.s32 q11, d17, d8
|
||||
vmull.s32 q12, d18, d8
|
||||
vmull.s32 q13, d19, d8
|
||||
vshrn.s64 d16, q10, #16
|
||||
vshrn.s64 d17, q11, #16
|
||||
vshrn.s64 d18, q12, #16
|
||||
vshrn.s64 d19, q13, #16
|
||||
|
||||
@ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre2Q16[k])
|
||||
vmull.s32 q10, d0, d8
|
||||
vmull.s32 q11, d1, d8
|
||||
vmull.s32 q12, d2, d8
|
||||
vmull.s32 q13, d3, d8
|
||||
vshrn.s64 d0, q10, #16
|
||||
vshrn.s64 d1, q11, #16
|
||||
vshrn.s64 d2, q12, #16
|
||||
vshrn.s64 d3, q13, #16
|
||||
|
||||
vmull.s32 q10, d4, d8
|
||||
vmull.s32 q11, d5, d8
|
||||
vmull.s32 q12, d6, d8
|
||||
vmull.s32 q13, d7, d8
|
||||
vshrn.s64 d4, q10, #16
|
||||
vshrn.s64 d5, q11, #16
|
||||
vshrn.s64 d6, q12, #16
|
||||
vshrn.s64 d7, q13, #16
|
||||
|
||||
vqdmulh.s32 q0, q0, q4
|
||||
vqdmulh.s32 q1, q1, q4
|
||||
vst1.32 {q6, q7}, [r6]! @ outre1Q16[]
|
||||
vqdmulh.s32 q2, q2, q4
|
||||
vqdmulh.s32 q3, q3, q4
|
||||
vst1.32 {q8, q9}, [r6]! @ outre1Q16[]
|
||||
vst1.32 {q0, q1}, [r7]! @ outre2Q16[]
|
||||
vst1.32 {q2, q3}, [r7]! @ outre2Q16[]
|
||||
|
||||
bgt POST_FFT_SHIFT_DIVIDE
|
||||
bgt PostFftShiftDivide
|
||||
|
||||
mov r8, #(FRAMESAMPLES / 2)
|
||||
ldr r9, =kCosTab1
|
||||
ldr r10, =kSinTab1
|
||||
ldr r2, [sp, #8] @ outre1Q16
|
||||
ldr r3, [sp, #12] @ outre2Q16
|
||||
movw r0, #31727
|
||||
lsl r0, #16 @ With vqdmulh and vrshrn, net effect is ">> 25".
|
||||
|
||||
DEMODULATE_AND_SEPARATE:
|
||||
vld1.16 {q0}, [r9]! @ kCosTab1[]
|
||||
vld1.16 {q1}, [r10]! @ kSinTab1[]
|
||||
vld1.32 {q2, q3}, [r2] @ outre1Q16
|
||||
vld1.32 {q4, q5}, [r3] @ outre2Q16
|
||||
DemodulateAndSeparate:
|
||||
subs r8, #8
|
||||
|
||||
vld1.16 {q0}, [r9:64]! @ kCosTab1[]
|
||||
vmovl.s16 q6, d0 @ kCosTab1[]
|
||||
vld1.16 {q1}, [r10:64]! @ kSinTab1[]
|
||||
vmovl.s16 q7, d1 @ kCosTab1[]
|
||||
vld1.32 {q2, q3}, [r2] @ outre1Q16
|
||||
vmovl.s16 q8, d2 @ kSinTab1[]
|
||||
vld1.32 {q4, q5}, [r3] @ outre2Q16
|
||||
vmovl.s16 q9, d3 @ kSinTab1[]
|
||||
|
||||
vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k]
|
||||
@ -311,72 +579,47 @@ DEMODULATE_AND_SEPARATE:
|
||||
vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k]
|
||||
|
||||
vmull.s32 q0, d16, d8 @ kSinTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q1, d17, d9 @ kSinTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q14, d18, d10 @ kSinTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q15, d19, d11 @ kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q10, d16, d8 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q11, d17, d9 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q12, d18, d10 @ += kSinTab1[k] * outre2Q16[k]
|
||||
vmlsl.s32 q13, d19, d11 @ += kSinTab1[k] * outre2Q16[k]
|
||||
|
||||
vsub.s64 q10, q10, q0
|
||||
vsub.s64 q11, q11, q1
|
||||
vsub.s64 q12, q12, q14
|
||||
vsub.s64 q13, q13, q15
|
||||
|
||||
vrshrn.s64 d20, q10, #14 @ xrQ16
|
||||
vrshrn.s64 d21, q11, #14 @ xrQ16
|
||||
vrshrn.s64 d22, q12, #14 @ xrQ16
|
||||
vrshrn.s64 d23, q13, #14 @ xrQ16
|
||||
|
||||
subs r8, #8
|
||||
vrshrn.s64 d20, q10, #10 @ xrQ16
|
||||
vrshrn.s64 d21, q11, #10 @ xrQ16
|
||||
vrshrn.s64 d22, q12, #10 @ xrQ16
|
||||
vrshrn.s64 d23, q13, #10 @ xrQ16
|
||||
|
||||
vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k]
|
||||
vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k]
|
||||
|
||||
vdup.s32 d9, r0 @ generic -> Neon doesn't cost extra cycles.
|
||||
vmlal.s32 q12, d16, d4 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q13, d17, d5 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q14, d18, d6 @ += kSinTab1[k] * outre1Q16[k]
|
||||
vmlal.s32 q15, d19, d7 @ += kSinTab1[k] * outre1Q16[k]
|
||||
|
||||
vmull.s32 q0, d16, d4 @ kSinTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q1, d17, d5 @ kSinTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q6, d18, d6 @ kSinTab1[k] * outre1Q16[k]
|
||||
vmull.s32 q7, d19, d7 @ kSinTab1[k] * outre1Q16[k]
|
||||
vdup.s32 q4, r0 @ generic -> Neon doesn't cost extra cycles.
|
||||
|
||||
vadd.s64 q12, q12, q0
|
||||
vadd.s64 q13, q13, q1
|
||||
vadd.s64 q14, q14, q6
|
||||
vadd.s64 q15, q15, q7
|
||||
|
||||
vrshrn.s64 d24, q12, #14 @ xiQ16
|
||||
vrshrn.s64 d25, q13, #14 @ xiQ16
|
||||
vrshrn.s64 d26, q14, #14 @ xiQ16
|
||||
vrshrn.s64 d27, q15, #14 @ xiQ16
|
||||
vrshrn.s64 d24, q12, #10 @ xiQ16
|
||||
vrshrn.s64 d25, q13, #10 @ xiQ16
|
||||
vrshrn.s64 d26, q14, #10 @ xiQ16
|
||||
vrshrn.s64 d27, q15, #10 @ xiQ16
|
||||
|
||||
@ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16)
|
||||
vmull.s32 q0, d20, d9
|
||||
vmull.s32 q1, d21, d9
|
||||
vmull.s32 q2, d22, d9
|
||||
vmull.s32 q3, d23, d9
|
||||
|
||||
vrshrn.s64 d0, q0, #11
|
||||
vrshrn.s64 d1, q1, #11
|
||||
vrshrn.s64 d2, q2, #11
|
||||
vrshrn.s64 d3, q3, #11
|
||||
|
||||
@ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xiQ16)
|
||||
vmull.s32 q6, d24, d9
|
||||
vmull.s32 q7, d25, d9
|
||||
vmull.s32 q8, d26, d9
|
||||
vmull.s32 q9, d27, d9
|
||||
|
||||
vrshrn.s64 d4, q6, #11
|
||||
vrshrn.s64 d5, q7, #11
|
||||
vrshrn.s64 d6, q8, #11
|
||||
vrshrn.s64 d7, q9, #11
|
||||
vqdmulh.s32 q0, q10, q4
|
||||
vqdmulh.s32 q1, q11, q4
|
||||
vqdmulh.s32 q2, q12, q4
|
||||
vqdmulh.s32 q3, q13, q4
|
||||
|
||||
vst1.16 {q0, q1}, [r2]! @ outre1Q16[]
|
||||
vst1.16 {q2, q3}, [r3]! @ outre2Q16[]
|
||||
|
||||
bgt DEMODULATE_AND_SEPARATE
|
||||
bgt DemodulateAndSeparate
|
||||
|
||||
add sp, sp, #16
|
||||
vpop {q4-q7}
|
||||
pop {r4-r11,pc}
|
||||
|
||||
|
@ -11,89 +11,152 @@
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
|
||||
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
|
||||
|
||||
static const int kSamples = FRAMESAMPLES/2;
|
||||
static int32_t spec2time_out_expected_1[kSamples] = {-3366470, -2285227,
|
||||
-3415765, -2310215, -3118030, -2222470, -3030254, -2192091, -3423170,
|
||||
-2216041, -3305541, -2171936, -3195767, -2095779, -3153304, -2157560,
|
||||
-3071167, -2032108, -3101190, -1972016, -3103824, -2089118, -3139811,
|
||||
-1898337, -3102801, -2055082, -3029665, -1854140, -2962586, -1966454,
|
||||
-3071167, -1894588, -2851743, -1917315, -2848087, -1594932, -2799242,
|
||||
-1462184, -2845887, -1437599, -2691776, -1329637, -2770659, -1268491,
|
||||
-2625161, -1578991, -2460299, -1186385, -2365613, -1039354, -2322608,
|
||||
-958518, -2271749, -789860, -2254538, -850308, -2384436, -850959, -2133734,
|
||||
-587678, -2093316, -495115, -1973364, -475177, -1801282, -173507,
|
||||
-1848516, -158015, -1792018, -62648, -1643313, 214746, -1500758, 267077,
|
||||
-1450193, 560521, -1521579, 675283, -1345408, 857559, -1300822, 1116332,
|
||||
-1294533, 1241117, -1070027, 1263503, -983816, 1529821, -1019586,
|
||||
1910421, -955420, 2073688, -836459, 2401105, -653905, 2690474, -731425,
|
||||
2930131, -935234, 3299500, -875978, 3523432, -878906, 3924822, -1081630,
|
||||
4561267, -1203023, 5105274, -1510983, 6052762, -2294646, 7021597,
|
||||
-3108053, 8826736, -4935222, 11678789, -8442713, 18725700, -21526692,
|
||||
25420577, 19589811, -28108666, 12634054, -14483066, 6263217, -9979706,
|
||||
3665661, -7909736, 2531530, -6434896, 1700772, -5525393, 1479473,
|
||||
-4894262, 1231760, -4353044, 1032940, -3786590, 941152, -3331614,
|
||||
665090, -2851619, 830696, -2762201, 958007, -2483118, 788233, -2184965,
|
||||
804825, -1967306, 1007255, -1862474, 920889, -1457506, 755406, -1405841,
|
||||
890230, -1302124, 1161599, -701867, 1154163, -1083366, 1204743, -513581,
|
||||
1547264, -650636, 1493384, -285543, 1771863, -277906, 1841343, -9078,
|
||||
1751863, 230222, 1819578, 207170, 1978972, 398137, 2106468, 552155,
|
||||
1997624, 685213, 2129520, 601078, 2238736, 944591, 2441879, 1194178,
|
||||
2355280, 986124, 2393328, 1049005, 2417944, 1208368, 2489516, 1352023,
|
||||
2572118, 1445283, 2856081, 1532997, 2742279, 1615877, 2915274, 1808036,
|
||||
2856871, 1806936, 3241747, 1622461, 2978558, 1841297, 3010378, 1923666,
|
||||
3271367, 2126700, 3070935, 1956958, 3107588, 2128405, 3288872, 2114911,
|
||||
3315952, 2406651, 3344038, 2370199, 3368980, 2144361, 3305030, 2183803,
|
||||
3401450, 2523102, 3405463, 2452475, 3463355, 2421678, 3551968, 2431949,
|
||||
3477251, 2148125, 3244489, 2174090};
|
||||
static int32_t spec2time_out_expected_2[kSamples]= {1691694, -2499988, -2035547,
|
||||
1060469, 988634, -2044502, -306271, 2041000, 201454, -2289456, 93694,
|
||||
2129427, -369152, -1887834, 860796, 2089102, -929424, -1673956, 1395291,
|
||||
1785651, -1619673, -1380109, 1963449, 1093311, -2111007, -840456,
|
||||
2372786, 578119, -2242702, 89774, 2463304, -132717, -2121480, 643634,
|
||||
2277636, -1125999, -1995858, 1543748, 2227861, -1483779, -1495491,
|
||||
2102642, 1833876, -1920568, -958378, 2485101, 772261, -2454257, -24942,
|
||||
2918714, 136838, -2500453, 816118, 3039735, -746560, -2365815, 1586396,
|
||||
2714951, -1511696, -1942334, 2571792, 2182827, -2325335, -1311543,
|
||||
3055970, 1367220, -2737182, -110626, 3889222, 631008, -3280879, 853066,
|
||||
4122279, -706638, -3334449, 2148311, 3993512, -1846301, -3004894,
|
||||
3426779, 3329522, -3165264, -2242423, 4756866, 2557711, -4131280,
|
||||
-805259, 5702711, 1120592, -4852821, 743664, 6476444, -621186, -5465828,
|
||||
2815787, 6768835, -3017442, -5338409, 5658126, 6838454, -5492288,
|
||||
-4682382, 8874947, 6153814, -8832561, -2649251, 12817398, 4237692,
|
||||
-13000247, 1190661, 18986363, -115738, -19693978, 9908367, 30660381,
|
||||
-10632635, -37962068, 47022884, 89744622, -42087632, 40279224,
|
||||
-88869341, -47542383, 38572364, 10441576, -30339718, -9926740, 19896578,
|
||||
28009, -18886612, -1124047, 13232498, -4150304, -12770551, 2637074,
|
||||
9051831, -6162211, -8713972, 4557937, 5489716, -6862312, -5532349,
|
||||
5415449, 2791310, -6999367, -2790102, 5375806, 546222, -6486452,
|
||||
-821261, 4994973, -1278840, -5645501, 1060484, 3996285, -2503954,
|
||||
-4653629, 2220549, 3036977, -3282133, -3318585, 2780636, 1789880,
|
||||
-4004589, -2041031, 3105373, 574819, -3992722, -971004, 3001703,
|
||||
-676739, -3841508, 417284, 2897970, -1427018, -3058480, 1189948,
|
||||
2210960, -2268992, -2603272, 1949785, 1576172, -2720404, -1891738,
|
||||
2309456, 769178, -2975646, -707150, 2424652, -88039, -2966660, -65452,
|
||||
2320780, -957557, -2798978, 744640, 1879794, -1672081, -2365319,
|
||||
1253309, 1366383, -2204082, -1544367, 1801452, 613828, -2531994,
|
||||
-983847, 2064842, 118326, -2613790, -203220, 2219635, -730341, -2641861,
|
||||
563557, 1765434, -1329916, -2272927, 1037138, 1266725, -1939220,
|
||||
-1588643, 1754528, 816552, -2376303, -1099167, 1864999, 122477,
|
||||
-2422762, -400027, 1889228, -579916, -2490353, 287139, 2011318,
|
||||
-1176657, -2502978, 812896, 1116502, -1940211};
|
||||
static int16_t time2spec_out_expected_1[kSamples]= {20342, 23889, -10063, -9419,
|
||||
3242, 7280, -2012, -5029, 332, 4478, -97, -3244, -891, 3117, 773, -2204,
|
||||
-1335, 2009, 1236, -1469, -1562, 1277, 1366, -815, -1619, 599, 1449, -177,
|
||||
-1507, 116, 1294, 263, -1338, -244, 1059, 553, -1045, -549, 829, 826,
|
||||
-731, -755, 516, 909, -427, -853, 189, 1004, -184, -828, -108, 888, 72,
|
||||
-700, -280, 717, 342, -611, -534, 601, 534, -374, -646, 399, 567, -171,
|
||||
-720, 234, 645, -11, -712, -26, 593, 215, -643, -172, 536, 361, -527,
|
||||
-403, 388, 550, -361, -480, 208, 623, -206, -585, 41, 578, 12, -504,
|
||||
-182, 583, 218, -437, -339, 499, 263, -354, -450, 347, 456, -193, -524,
|
||||
212, 475, -74, -566, 94, 511, 112, -577, -201, 408, 217, -546, -295, 338,
|
||||
387, -13, 4, -46, 2, -76, 103, -83, 108, -55, 100, -150, 131, -156, 141,
|
||||
-171, 179, -190, 128, -227, 172, -214, 215, -189, 265, -244, 322, -335,
|
||||
337, -352, 358, -368, 362, -355, 366, -381, 403, -395, 411, -392, 446,
|
||||
-458, 504, -449, 507, -464, 452, -491, 481, -534, 486, -516, 560, -535,
|
||||
525, -537, 559, -554, 570, -616, 591, -585, 627, -509, 588, -584, 547,
|
||||
-610, 580, -614, 635, -620, 655, -554, 546, -591, 642, -590, 660, -656,
|
||||
629, -604, 620, -580, 617, -645, 648, -573, 612, -604, 584, -571, 597,
|
||||
-562, 627, -550, 560, -606, 529, -584, 568, -503, 532, -463, 512, -440,
|
||||
399, -457, 437, -349, 278, -317, 257, -220, 163, -8, -61, 18, -161, 367,
|
||||
-1306};
|
||||
static int16_t time2spec_out_expected_2[kSamples]= {14283, -11552, -15335, 6626,
|
||||
7554, -2150, -6309, 1307, 4523, -4, -3908, -314, 3001, 914, -2715, -1042,
|
||||
2094, 1272, -1715, -1399, 1263, 1508, -1021, -1534, 735, 1595, -439, -1447,
|
||||
155, 1433, 22, -1325, -268, 1205, 424, -1030, -608, 950, 643, -733, -787,
|
||||
661, 861, -502, -888, 331, 852, -144, -849, 19, 833, 99, -826, -154,
|
||||
771, 368, -735, -459, 645, 513, -491, -604, 431, 630, -314, -598, 183,
|
||||
622, -78, -612, -48, 641, 154, -645, -257, 610, 281, -529, -444, 450,
|
||||
441, -327, -506, 274, 476, -232, -570, 117, 554, -86, -531, -21, 572,
|
||||
151, -606, -221, 496, 322, -407, -388, 407, 394, -268, -428, 280, 505,
|
||||
-115, -588, 19, 513, -29, -539, -109, 468, 173, -501, -242, 442, 278,
|
||||
-478, -680, 656, -659, 656, -669, 602, -688, 612, -667, 612, -642, 627,
|
||||
-648, 653, -676, 596, -680, 655, -649, 678, -672, 587, -608, 637, -645,
|
||||
637, -620, 556, -580, 553, -635, 518, -599, 583, -501, 536, -544, 473,
|
||||
-552, 583, -511, 541, -532, 563, -486, 461, -453, 486, -388, 424, -416,
|
||||
432, -374, 399, -462, 364, -346, 293, -329, 331, -313, 281, -247, 309,
|
||||
-337, 241, -190, 207, -194, 179, -163, 155, -156, 117, -135, 107, -126,
|
||||
29, -22, 81, -8, 17, -61, -10, 8, -37, 80, -44, 72, -88, 65, -89, 130,
|
||||
-114, 181, -215, 189, -245, 260, -288, 294, -339, 344, -396, 407, -429,
|
||||
438, -439, 485, -556, 629, -612, 637, -645, 661, -737, 829, -830, 831,
|
||||
-1041};
|
||||
|
||||
class TransformTest : public testing::Test {
|
||||
protected:
|
||||
TransformTest() {
|
||||
WebRtcSpl_Init();
|
||||
}
|
||||
|
||||
// Pass a function pointer to the Tester function.
|
||||
void Time2SpecTester(Time2Spec Time2SpecFunction) {
|
||||
// WebRtcIsacfix_Time2Spec functions hard coded the buffer lengths. It's a
|
||||
// large buffer but we have to test it here.
|
||||
int16_t data_in_1[kSamples] = {0};
|
||||
int16_t data_in_2[kSamples] = {0};
|
||||
int16_t data_out_1[kSamples] = {0};
|
||||
int16_t data_out_2[kSamples] = {0};
|
||||
|
||||
for(int i = 0; i < kSamples; i++) {
|
||||
data_in_1[i] = i * i + 1777;
|
||||
data_in_2[i] = WEBRTC_SPL_WORD16_MAX / (i + 1) + 17;
|
||||
}
|
||||
|
||||
Time2SpecFunction(data_in_1, data_in_2, data_out_1, data_out_2);
|
||||
|
||||
for (int i = 0; i < kSamples; i++) {
|
||||
// We don't require bit-exact for ARM assembly code.
|
||||
EXPECT_LE(abs(time2spec_out_expected_1[i] - data_out_1[i]), 1);
|
||||
EXPECT_LE(abs(time2spec_out_expected_2[i] - data_out_2[i]), 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass a function pointer to the Tester function.
|
||||
void Spec2TimeTester(Spec2Time Spec2TimeFunction) {
|
||||
// WebRtcIsacfix_Spec2Time functions hard coded the buffer lengths. It's a
|
||||
// large buffer but we have to test it here.
|
||||
const int kSamples = FRAMESAMPLES/2;
|
||||
int16_t data_in_1[kSamples] = {0};
|
||||
int16_t data_in_2[kSamples] = {0};
|
||||
int32_t data_out_1[kSamples] = {0};
|
||||
int32_t data_out_2[kSamples] = {0};
|
||||
int32_t out_expected_1[kSamples]= {-3366470, -2285227, -3415765,
|
||||
-2310215, -3118030, -2222470, -3030254, -2192091, -3423170, -2216041,
|
||||
-3305541, -2171936, -3195767, -2095779, -3153304, -2157560, -3071167,
|
||||
-2032108, -3101190, -1972016, -3103824, -2089118, -3139811, -1898337,
|
||||
-3102801, -2055082, -3029665, -1854140, -2962586, -1966454, -3071167,
|
||||
-1894588, -2851743, -1917315, -2848087, -1594932, -2799242, -1462184,
|
||||
-2845887, -1437599, -2691776, -1329637, -2770659, -1268491, -2625161,
|
||||
-1578991, -2460299, -1186385, -2365613, -1039354, -2322608, -958518,
|
||||
-2271749, -789860, -2254538, -850308, -2384436, -850959, -2133734,
|
||||
-587678, -2093316, -495115, -1973364, -475177, -1801282, -173507,
|
||||
-1848516, -158015, -1792018, -62648, -1643313, 214746, -1500758, 267077,
|
||||
-1450193, 560521, -1521579, 675283, -1345408, 857559, -1300822, 1116332,
|
||||
-1294533, 1241117, -1070027, 1263503, -983816, 1529821, -1019586,
|
||||
1910421, -955420, 2073688, -836459, 2401105, -653905, 2690474, -731425,
|
||||
2930131, -935234, 3299500, -875978, 3523432, -878906, 3924822, -1081630,
|
||||
4561267, -1203023, 5105274, -1510983, 6052762, -2294646, 7021597,
|
||||
-3108053, 8826736, -4935222, 11678789, -8442713, 18725700, -21526692,
|
||||
25420577, 19589811, -28108666, 12634054, -14483066, 6263217, -9979706,
|
||||
3665661, -7909736, 2531530, -6434896, 1700772, -5525393, 1479473,
|
||||
-4894262, 1231760, -4353044, 1032940, -3786590, 941152, -3331614,
|
||||
665090, -2851619, 830696, -2762201, 958007, -2483118, 788233, -2184965,
|
||||
804825, -1967306, 1007255, -1862474, 920889, -1457506, 755406, -1405841,
|
||||
890230, -1302124, 1161599, -701867, 1154163, -1083366, 1204743, -513581,
|
||||
1547264, -650636, 1493384, -285543, 1771863, -277906, 1841343, -9078,
|
||||
1751863, 230222, 1819578, 207170, 1978972, 398137, 2106468, 552155,
|
||||
1997624, 685213, 2129520, 601078, 2238736, 944591, 2441879, 1194178,
|
||||
2355280, 986124, 2393328, 1049005, 2417944, 1208368, 2489516, 1352023,
|
||||
2572118, 1445283, 2856081, 1532997, 2742279, 1615877, 2915274, 1808036,
|
||||
2856871, 1806936, 3241747, 1622461, 2978558, 1841297, 3010378, 1923666,
|
||||
3271367, 2126700, 3070935, 1956958, 3107588, 2128405, 3288872, 2114911,
|
||||
3315952, 2406651, 3344038, 2370199, 3368980, 2144361, 3305030, 2183803,
|
||||
3401450, 2523102, 3405463, 2452475, 3463355, 2421678, 3551968, 2431949,
|
||||
3477251, 2148125, 3244489, 2174090};
|
||||
int32_t out_expected_2[kSamples]= {1691694, -2499988, -2035547,
|
||||
1060469, 988634, -2044502, -306271, 2041000, 201454, -2289456, 93694,
|
||||
2129427, -369152, -1887834, 860796, 2089102, -929424, -1673956, 1395291,
|
||||
1785651, -1619673, -1380109, 1963449, 1093311, -2111007, -840456,
|
||||
2372786, 578119, -2242702, 89774, 2463304, -132717, -2121480, 643634,
|
||||
2277636, -1125999, -1995858, 1543748, 2227861, -1483779, -1495491,
|
||||
2102642, 1833876, -1920568, -958378, 2485101, 772261, -2454257, -24942,
|
||||
2918714, 136838, -2500453, 816118, 3039735, -746560, -2365815, 1586396,
|
||||
2714951, -1511696, -1942334, 2571792, 2182827, -2325335, -1311543,
|
||||
3055970, 1367220, -2737182, -110626, 3889222, 631008, -3280879, 853066,
|
||||
4122279, -706638, -3334449, 2148311, 3993512, -1846301, -3004894,
|
||||
3426779, 3329522, -3165264, -2242423, 4756866, 2557711, -4131280,
|
||||
-805259, 5702711, 1120592, -4852821, 743664, 6476444, -621186, -5465828,
|
||||
2815787, 6768835, -3017442, -5338409, 5658126, 6838454, -5492288,
|
||||
-4682382, 8874947, 6153814, -8832561, -2649251, 12817398, 4237692,
|
||||
-13000247, 1190661, 18986363, -115738, -19693978, 9908367, 30660381,
|
||||
-10632635, -37962068, 47022884, 89744622, -42087632, 40279224,
|
||||
-88869341, -47542383, 38572364, 10441576, -30339718, -9926740, 19896578,
|
||||
28009, -18886612, -1124047, 13232498, -4150304, -12770551, 2637074,
|
||||
9051831, -6162211, -8713972, 4557937, 5489716, -6862312, -5532349,
|
||||
5415449, 2791310, -6999367, -2790102, 5375806, 546222, -6486452,
|
||||
-821261, 4994973, -1278840, -5645501, 1060484, 3996285, -2503954,
|
||||
-4653629, 2220549, 3036977, -3282133, -3318585, 2780636, 1789880,
|
||||
-4004589, -2041031, 3105373, 574819, -3992722, -971004, 3001703,
|
||||
-676739, -3841508, 417284, 2897970, -1427018, -3058480, 1189948,
|
||||
2210960, -2268992, -2603272, 1949785, 1576172, -2720404, -1891738,
|
||||
2309456, 769178, -2975646, -707150, 2424652, -88039, -2966660, -65452,
|
||||
2320780, -957557, -2798978, 744640, 1879794, -1672081, -2365319,
|
||||
1253309, 1366383, -2204082, -1544367, 1801452, 613828, -2531994,
|
||||
-983847, 2064842, 118326, -2613790, -203220, 2219635, -730341, -2641861,
|
||||
563557, 1765434, -1329916, -2272927, 1037138, 1266725, -1939220,
|
||||
-1588643, 1754528, 816552, -2376303, -1099167, 1864999, 122477,
|
||||
-2422762, -400027, 1889228, -579916, -2490353, 287139, 2011318,
|
||||
-1176657, -2502978, 812896, 1116502, -1940211};
|
||||
|
||||
for(int i = 0; i < kSamples; i++) {
|
||||
data_in_1[i] = i * i + 1777;
|
||||
data_in_2[i] = WEBRTC_SPL_WORD16_MAX / (i + 1) + 17;
|
||||
@ -103,12 +166,24 @@ class TransformTest : public testing::Test {
|
||||
|
||||
for (int i = 0; i < kSamples; i++) {
|
||||
// We don't require bit-exact for ARM assembly code.
|
||||
EXPECT_LE(abs(out_expected_1[i] - data_out_1[i]), 16);
|
||||
EXPECT_LE(abs(out_expected_2[i] - data_out_2[i]), 16);
|
||||
EXPECT_LE(abs(spec2time_out_expected_1[i] - data_out_1[i]), 16);
|
||||
EXPECT_LE(abs(spec2time_out_expected_2[i] - data_out_2[i]), 16);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_F(TransformTest, Time2SpecTest) {
|
||||
Time2SpecTester(WebRtcIsacfix_Time2SpecC);
|
||||
#ifdef WEBRTC_DETECT_ARM_NEON
|
||||
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
|
||||
Time2SpecTester(WebRtcIsacfix_Time2SpecNeon);
|
||||
}
|
||||
#elif defined(WEBRTC_ARCH_ARM_NEON)
|
||||
Time2SpecTester(WebRtcIsacfix_Time2SpecNeon);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(TransformTest, Spec2TimeTest) {
|
||||
Spec2TimeTester(WebRtcIsacfix_Spec2TimeC);
|
||||
#ifdef WEBRTC_DETECT_ARM_NEON
|
||||
|
@ -24,6 +24,9 @@
|
||||
.macro DEFINE_FUNCTION name
|
||||
_\name:
|
||||
.endm
|
||||
.macro CALL_FUNCTION name
|
||||
bl _\name
|
||||
.endm
|
||||
#else
|
||||
.macro GLOBAL_FUNCTION name
|
||||
.global \name
|
||||
@ -31,6 +34,9 @@ _\name:
|
||||
.macro DEFINE_FUNCTION name
|
||||
\name:
|
||||
.endm
|
||||
.macro CALL_FUNCTION name
|
||||
bl \name
|
||||
.endm
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
Loading…
x
Reference in New Issue
Block a user