Ported assembly coding in APM from Android to iOS.
Bugs=none Test=trybots, and offline file bit-exact tests. Review URL: https://webrtc-codereview.appspot.com/1066009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3563 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
0d8d010017
commit
2f9bd247ad
@ -32,6 +32,21 @@
|
||||
'variables': {
|
||||
'out_dir': '<(SHARED_INTERMEDIATE_DIR)/<(asm_header_dir)',
|
||||
'process_outputs_as_sources': 1,
|
||||
'conditions': [
|
||||
# We only support Android and iOS.
|
||||
['OS=="android"', {
|
||||
'compiler_to_use':
|
||||
'<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} <(android_toolchain)/*-gcc)',
|
||||
'compiler_options': '-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
|
||||
'pattern_to_detect': 'offset_',
|
||||
}],
|
||||
['OS=="ios"', {
|
||||
'compiler_to_use': 'clang',
|
||||
'compiler_options':
|
||||
'-arch armv7 -I<(webrtc_root)/.. -isysroot $(SDKROOT) -S',
|
||||
'pattern_to_detect': '_offset_',
|
||||
}],
|
||||
]
|
||||
},
|
||||
'rules': [
|
||||
{
|
||||
@ -46,10 +61,9 @@
|
||||
'action': [
|
||||
'python',
|
||||
'<(webrtc_root)/build/generate_asm_header.py',
|
||||
'--compiler=<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} '
|
||||
'<(android_toolchain)/*-gcc)',
|
||||
# Compiler options.
|
||||
'--options=-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
|
||||
'--compiler=<(compiler_to_use)',
|
||||
'--options=<(compiler_options)',
|
||||
'--pattern=<(pattern_to_detect)',
|
||||
'--dir=<(out_dir)',
|
||||
'<(RULE_INPUT_PATH)',
|
||||
],
|
||||
|
@ -19,8 +19,9 @@ and writes them into header files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
def main(argv):
|
||||
@ -44,6 +45,7 @@ def main(argv):
|
||||
# Set the shell command with the compiler and options inputs.
|
||||
compiler_command = (options.compiler + " " + options.options + " " +
|
||||
input_filename + " -o " + interim_filename)
|
||||
|
||||
# Run the shell command and generate the intermediate file.
|
||||
subprocess.check_call(compiler_command, shell=True)
|
||||
|
||||
@ -51,13 +53,19 @@ def main(argv):
|
||||
out_file = open(out_filename, 'w') # The output header file.
|
||||
|
||||
# Generate the output header file.
|
||||
for line in interim_file: # Iterate though all the lines in the input file.
|
||||
while True:
|
||||
line = interim_file.readline()
|
||||
if not line: break
|
||||
if line.startswith(options.pattern):
|
||||
out_file.write('#define ')
|
||||
out_file.write(line.split(':')[0]) # Write the constant name.
|
||||
out_file.write(' ')
|
||||
if line.find('.word') >= 0:
|
||||
out_file.write(line.split('.word')[1]) # Write the constant value.
|
||||
# Find name of the next constant and write to the output file.
|
||||
const_name = re.sub(r'^_', '', line.split(':')[0])
|
||||
out_file.write('#define %s ' % const_name)
|
||||
|
||||
# Find value of the constant we just found and write to the output file.
|
||||
line = interim_file.readline()
|
||||
const_value = filter(str.isdigit, line.split(' ')[0])
|
||||
if const_value != '':
|
||||
out_file.write('%s\n' % const_value)
|
||||
|
||||
interim_file.close()
|
||||
out_file.close()
|
||||
|
@ -60,12 +60,12 @@ LOOP_PART_LEN:
|
||||
bgt LOOP_PART_LEN
|
||||
|
||||
@ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
|
||||
ldr r12, =offset_aecm_real_fft
|
||||
movw r12, #offset_aecm_real_fft
|
||||
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
|
||||
mov r2, r3 @ freq_signal
|
||||
mov r4, r3
|
||||
ldr r0, [r0, r12] @ aecm->real_fft
|
||||
bl WebRtcSpl_RealForwardFFTNeon
|
||||
CALL_FUNCTION WebRtcSpl_RealForwardFFTNeon
|
||||
|
||||
mov r12, #(PART_LEN * 2 / 16) @ Loop counter, unrolled by 16.
|
||||
|
||||
@ -119,15 +119,15 @@ LOOP_PRE_IFFT:
|
||||
str r8, [r4, r3]
|
||||
|
||||
@ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
|
||||
ldr r12, =offset_aecm_real_fft
|
||||
movw r12, #offset_aecm_real_fft
|
||||
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
|
||||
sub r2, #(PART_LEN * 4) @ Get r2 back to &efw[0].
|
||||
mov r4, r2 @ Keep efw in r4.
|
||||
ldr r0, [r0, r12] @ aecm->real_fft
|
||||
bl WebRtcSpl_RealInverseFFTNeon
|
||||
CALL_FUNCTION WebRtcSpl_RealInverseFFTNeon
|
||||
|
||||
ldr r6, =offset_aecm_outBuf
|
||||
ldr r12, =offset_aecm_dfaCleanQDomain
|
||||
movw r6, #offset_aecm_outBuf
|
||||
movw r12, #offset_aecm_dfaCleanQDomain
|
||||
ldr r8, [r5, r6] @ &aecm->outBuf[0]
|
||||
ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0]
|
||||
|
||||
@ -160,8 +160,8 @@ LOOP_POST_IFFT:
|
||||
vst1.16 d0, [r8, :64]! @ aecm->outBuf[i]
|
||||
bgt LOOP_POST_IFFT
|
||||
|
||||
ldr r3, =offset_aecm_xBuf
|
||||
ldr r12, =offset_aecm_dBufNoisy
|
||||
movw r3, #offset_aecm_xBuf
|
||||
movw r12, #offset_aecm_dBufNoisy
|
||||
ldr r3, [r5, r3] @ &aecm->xBuf[0]
|
||||
ldr r1, [r5, r12] @ &aecm->dBufNoisy[0]
|
||||
add r2, r3, #(PART_LEN * 2) @ &aecm->xBuf[PART_LEN]
|
||||
@ -180,7 +180,7 @@ LOOP_COPY:
|
||||
cmp r2, #0 @ Check if (nearendClean != NULL).
|
||||
beq END
|
||||
|
||||
ldr r4, =offset_aecm_dBufClean
|
||||
movw r4, #offset_aecm_dBufClean
|
||||
ldr r1, [r5, r4] @ &aecm->dBufClean[0]
|
||||
add r0, r1, #(PART_LEN * 2) @ &aecm->dBufClean[PART_LEN]
|
||||
|
||||
@ -210,8 +210,8 @@ DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
|
||||
vmov.i32 q8, #0
|
||||
vmov.i32 q9, #0
|
||||
|
||||
ldr r7, =offset_aecm_channelStored
|
||||
ldr r5, =offset_aecm_channelAdapt16
|
||||
movw r7, #offset_aecm_channelStored
|
||||
movw r5, #offset_aecm_channelAdapt16
|
||||
|
||||
mov r4, r2
|
||||
mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8.
|
||||
@ -269,8 +269,8 @@ LOOP_CALC_LINEAR_ENERGIES:
|
||||
@ int32_t* echo_est);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
|
||||
ldr r3, =offset_aecm_channelAdapt16
|
||||
ldr r12, =offset_aecm_channelStored
|
||||
movw r3, #offset_aecm_channelAdapt16
|
||||
movw r12, #offset_aecm_channelStored
|
||||
ldr r3, [r0, r3]
|
||||
ldr r0, [r0, r12]
|
||||
mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8.
|
||||
@ -296,8 +296,8 @@ LOOP_STORE_ADAPTIVE_CHANNEL:
|
||||
@ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
|
||||
ldr r1, =offset_aecm_channelAdapt16
|
||||
ldr r2, =offset_aecm_channelAdapt32
|
||||
movw r1, #offset_aecm_channelAdapt16
|
||||
movw r2, #offset_aecm_channelAdapt32
|
||||
movw r3, #offset_aecm_channelStored
|
||||
ldr r1, [r0, r1] @ &aecm->channelAdapt16[0]
|
||||
ldr r2, [r0, r2] @ &aecm->channelAdapt32[0]
|
||||
@ -321,8 +321,9 @@ LOOP_RESET_ADAPTIVE_CHANNEL:
|
||||
bx lr
|
||||
|
||||
@ Square root of Hanning window in Q14.
|
||||
.align 3
|
||||
.align 4
|
||||
WebRtcAecm_kSqrtHanning:
|
||||
_WebRtcAecm_kSqrtHanning:
|
||||
.short 0
|
||||
.short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
|
||||
.short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
|
||||
@ -335,7 +336,7 @@ WebRtcAecm_kSqrtHanning:
|
||||
|
||||
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
|
||||
@ the order was reversed and one element (0) was removed.
|
||||
.align 3
|
||||
.align 4
|
||||
kSqrtHanningReversed:
|
||||
.short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947
|
||||
.short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571
|
||||
|
@ -174,11 +174,10 @@
|
||||
'ns/nsx_core_neon.c',
|
||||
],
|
||||
'conditions': [
|
||||
['OS=="android"', {
|
||||
['OS=="android" or OS=="ios"', {
|
||||
'dependencies': [
|
||||
'audio_processing_offsets',
|
||||
],
|
||||
# TODO(kma): port this block from Android into other build systems.
|
||||
'sources': [
|
||||
'aecm/aecm_core_neon.S',
|
||||
'ns/nsx_core_neon.S',
|
||||
@ -192,7 +191,7 @@
|
||||
],
|
||||
}],
|
||||
'conditions': [
|
||||
['OS=="android"', {
|
||||
['OS=="android" or OS=="ios"', {
|
||||
'targets': [{
|
||||
'target_name': 'audio_processing_offsets',
|
||||
'type': 'none',
|
||||
|
@ -26,7 +26,9 @@ GLOBAL_LABEL WebRtcNsx_kLogTable
|
||||
GLOBAL_LABEL WebRtcNsx_kCounterDiv
|
||||
GLOBAL_LABEL WebRtcNsx_kLogTableFrac
|
||||
|
||||
.align 2
|
||||
WebRtcNsx_kLogTableFrac:
|
||||
_WebRtcNsx_kLogTableFrac:
|
||||
.short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26
|
||||
.short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50
|
||||
.short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72
|
||||
@ -45,7 +47,9 @@ WebRtcNsx_kLogTableFrac:
|
||||
.short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255
|
||||
.short 255
|
||||
|
||||
.align 2
|
||||
WebRtcNsx_kCounterDiv:
|
||||
_WebRtcNsx_kCounterDiv:
|
||||
.short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979
|
||||
.short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489
|
||||
.short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964
|
||||
@ -62,7 +66,9 @@ WebRtcNsx_kCounterDiv:
|
||||
.short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174
|
||||
.short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
|
||||
|
||||
.align 2
|
||||
WebRtcNsx_kLogTable:
|
||||
_WebRtcNsx_kLogTable:
|
||||
.short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
|
||||
|
||||
@ void NoiseEstimationNeon(NsxInst_t* inst,
|
||||
@ -82,6 +88,7 @@ WebRtcNsx_kLogTable:
|
||||
@ r11: countDiv
|
||||
@ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER
|
||||
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
|
||||
push {r4-r11, r14}
|
||||
vpush {d8-d15}
|
||||
@ -146,7 +153,8 @@ CHECK_LMAGN_COUNTER:
|
||||
ldr r7, [r0, r7]
|
||||
add r9, r0
|
||||
cmp r7, #END_STARTUP_LONG
|
||||
add r10, r0, #offset_nsx_noiseEstCounter
|
||||
movw r10, #offset_nsx_noiseEstCounter
|
||||
add r10, r0
|
||||
movge r7, #FACTOR_Q7
|
||||
movlt r7, #FACTOR_Q7_STARTUP
|
||||
mov r4, r0
|
||||
@ -307,7 +315,7 @@ UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER:
|
||||
|
||||
mov r0, r4
|
||||
mov r1, r7
|
||||
bl UpdateNoiseEstimateNeon
|
||||
CALL_FUNCTION UpdateNoiseEstimateNeon
|
||||
|
||||
POST_UPDATE_DENSITY_ESTIMATE:
|
||||
ldrh r3, [r10]
|
||||
@ -324,7 +332,7 @@ POST_UPDATE_DENSITY_ESTIMATE:
|
||||
|
||||
sub r1, r7, r6
|
||||
mov r0, r4
|
||||
bl UpdateNoiseEstimateNeon
|
||||
CALL_FUNCTION UpdateNoiseEstimateNeon
|
||||
|
||||
UPDATE_NOISE:
|
||||
movw r1, #offset_nsx_noiseEstQuantile
|
||||
@ -350,6 +358,7 @@ UPDATE_Q_NOISE:
|
||||
|
||||
@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset);
|
||||
@ Neon registers touched: q0-q3, q8-q13.
|
||||
.align 2
|
||||
DEFINE_FUNCTION UpdateNoiseEstimateNeon
|
||||
push {r4, r5, r6, r14}
|
||||
mov r5, r0
|
||||
@ -366,7 +375,7 @@ DEFINE_FUNCTION UpdateNoiseEstimateNeon
|
||||
|
||||
mov r0, r4
|
||||
mov r1, r6
|
||||
bl WebRtcSpl_MaxValueW16Neon
|
||||
CALL_FUNCTION WebRtcSpl_MaxValueW16Neon
|
||||
|
||||
sub r12, r6, #1 @ Loop counter: inst->magnLen - 1.
|
||||
|
||||
@ -418,6 +427,7 @@ POST_LOOP_MAGNLEN:
|
||||
pop {r4, r5, r6, pc}
|
||||
|
||||
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
|
||||
push {r4-r8}
|
||||
|
||||
@ -533,6 +543,7 @@ LOOP_ANALEN2:
|
||||
bx r14
|
||||
|
||||
@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon
|
||||
movw r12, #offset_nsx_normData
|
||||
movw r3, #offset_nsx_real
|
||||
@ -563,6 +574,7 @@ LOOP_ANALEN:
|
||||
@ void SynthesisUpdateNeon(NsxInst_t* inst,
|
||||
@ int16_t* out_frame,
|
||||
@ int16_t gain_factor);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcNsx_SynthesisUpdateNeon
|
||||
push {r4, r5}
|
||||
|
||||
@ -635,6 +647,7 @@ EXIT_SYNTHESISUPDATE:
|
||||
bx r14
|
||||
|
||||
@ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcNsx_AnalysisUpdateNeon
|
||||
push {r4-r6}
|
||||
|
||||
@ -693,6 +706,7 @@ POST_LOOP_WINDOW_DATA:
|
||||
bx r14
|
||||
|
||||
@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon
|
||||
movw r3, #offset_nsx_anaLen
|
||||
movw r12, #offset_nsx_normData
|
||||
|
@ -45,6 +45,15 @@ bl \name
|
||||
.endm
|
||||
#endif
|
||||
|
||||
// With llvm and clang compilers, for instructions ldrb, strh, etc.,
|
||||
// the condition code is after the width specifier. Here we define
|
||||
// only the ones that are actually used in the assembly files.
|
||||
#ifdef __llvm__
|
||||
.macro streqh reg1, reg2, num
|
||||
strheq \reg1, \reg2, \num
|
||||
.endm
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
|
||||
|
Loading…
x
Reference in New Issue
Block a user