Ported assembly coding in APM from Android to iOS.

Bugs=none
Test=trybots, and offline file bit-exact tests.
Review URL: https://webrtc-codereview.appspot.com/1066009

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3563 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2013-02-23 04:16:59 +00:00
parent 0d8d010017
commit 2f9bd247ad
6 changed files with 85 additions and 40 deletions

View File

@ -32,6 +32,21 @@
'variables': {
'out_dir': '<(SHARED_INTERMEDIATE_DIR)/<(asm_header_dir)',
'process_outputs_as_sources': 1,
'conditions': [
# We only support Android and iOS.
['OS=="android"', {
'compiler_to_use':
'<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} <(android_toolchain)/*-gcc)',
'compiler_options': '-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
'pattern_to_detect': 'offset_',
}],
['OS=="ios"', {
'compiler_to_use': 'clang',
'compiler_options':
'-arch armv7 -I<(webrtc_root)/.. -isysroot $(SDKROOT) -S',
'pattern_to_detect': '_offset_',
}],
]
},
'rules': [
{
@ -46,10 +61,9 @@
'action': [
'python',
'<(webrtc_root)/build/generate_asm_header.py',
'--compiler=<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} '
'<(android_toolchain)/*-gcc)',
# Compiler options.
'--options=-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
'--compiler=<(compiler_to_use)',
'--options=<(compiler_options)',
'--pattern=<(pattern_to_detect)',
'--dir=<(out_dir)',
'<(RULE_INPUT_PATH)',
],

View File

@ -19,8 +19,9 @@ and writes them into header files.
"""
import os
import sys
import re
import subprocess
import sys
from optparse import OptionParser
def main(argv):
@ -44,6 +45,7 @@ def main(argv):
# Set the shell command with the compiler and options inputs.
compiler_command = (options.compiler + " " + options.options + " " +
input_filename + " -o " + interim_filename)
# Run the shell command and generate the intermediate file.
subprocess.check_call(compiler_command, shell=True)
@ -51,13 +53,19 @@ def main(argv):
out_file = open(out_filename, 'w') # The output header file.
# Generate the output header file.
for line in interim_file: # Iterate though all the lines in the input file.
while True:
line = interim_file.readline()
if not line: break
if line.startswith(options.pattern):
out_file.write('#define ')
out_file.write(line.split(':')[0]) # Write the constant name.
out_file.write(' ')
if line.find('.word') >= 0:
out_file.write(line.split('.word')[1]) # Write the constant value.
# Find name of the next constant and write to the output file.
const_name = re.sub(r'^_', '', line.split(':')[0])
out_file.write('#define %s ' % const_name)
# Find value of the constant we just found and write to the output file.
line = interim_file.readline()
const_value = filter(str.isdigit, line.split(' ')[0])
if const_value != '':
out_file.write('%s\n' % const_value)
interim_file.close()
out_file.close()

View File

@ -28,7 +28,7 @@ GLOBAL_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
@ const WebRtc_Word16* time_signal,
@ complex16_t* freq_signal,
@ int time_signal_scaling);
.align 2
.align 2
DEFINE_FUNCTION WebRtcAecm_WindowAndFFTNeon
push {r4, r5, r6, lr}
@ -60,12 +60,12 @@ LOOP_PART_LEN:
bgt LOOP_PART_LEN
@ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
ldr r12, =offset_aecm_real_fft
movw r12, #offset_aecm_real_fft
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
mov r2, r3 @ freq_signal
mov r4, r3
ldr r0, [r0, r12] @ aecm->real_fft
bl WebRtcSpl_RealForwardFFTNeon
CALL_FUNCTION WebRtcSpl_RealForwardFFTNeon
mov r12, #(PART_LEN * 2 / 16) @ Loop counter, unrolled by 16.
@ -85,7 +85,7 @@ LOOP_PART_LEN2:
@ complex16_t* efw,
@ WebRtc_Word16* output,
@ const WebRtc_Word16* nearendClean);
.align 2
.align 2
DEFINE_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
push {r4-r8, lr}
@ -119,15 +119,15 @@ LOOP_PRE_IFFT:
str r8, [r4, r3]
@ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
ldr r12, =offset_aecm_real_fft
movw r12, #offset_aecm_real_fft
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
sub r2, #(PART_LEN * 4) @ Get r2 back to &efw[0].
mov r4, r2 @ Keep efw in r4.
ldr r0, [r0, r12] @ aecm->real_fft
bl WebRtcSpl_RealInverseFFTNeon
CALL_FUNCTION WebRtcSpl_RealInverseFFTNeon
ldr r6, =offset_aecm_outBuf
ldr r12, =offset_aecm_dfaCleanQDomain
movw r6, #offset_aecm_outBuf
movw r12, #offset_aecm_dfaCleanQDomain
ldr r8, [r5, r6] @ &aecm->outBuf[0]
ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0]
@ -160,8 +160,8 @@ LOOP_POST_IFFT:
vst1.16 d0, [r8, :64]! @ aecm->outBuf[i]
bgt LOOP_POST_IFFT
ldr r3, =offset_aecm_xBuf
ldr r12, =offset_aecm_dBufNoisy
movw r3, #offset_aecm_xBuf
movw r12, #offset_aecm_dBufNoisy
ldr r3, [r5, r3] @ &aecm->xBuf[0]
ldr r1, [r5, r12] @ &aecm->dBufNoisy[0]
add r2, r3, #(PART_LEN * 2) @ &aecm->xBuf[PART_LEN]
@ -180,7 +180,7 @@ LOOP_COPY:
cmp r2, #0 @ Check if (nearendClean != NULL).
beq END
ldr r4, =offset_aecm_dBufClean
movw r4, #offset_aecm_dBufClean
ldr r1, [r5, r4] @ &aecm->dBufClean[0]
add r0, r1, #(PART_LEN * 2) @ &aecm->dBufClean[PART_LEN]
@ -202,7 +202,7 @@ END:
@ WebRtc_UWord32* far_energy,
@ WebRtc_UWord32* echo_energy_adapt,
@ WebRtc_UWord32* echo_energy_stored);
.align 2
.align 2
DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
push {r4-r7}
@ -210,8 +210,8 @@ DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
vmov.i32 q8, #0
vmov.i32 q9, #0
ldr r7, =offset_aecm_channelStored
ldr r5, =offset_aecm_channelAdapt16
movw r7, #offset_aecm_channelStored
movw r5, #offset_aecm_channelAdapt16
mov r4, r2
mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8.
@ -267,10 +267,10 @@ LOOP_CALC_LINEAR_ENERGIES:
@ void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore_t* aecm,
@ const uint16_t* far_spectrum,
@ int32_t* echo_est);
.align 2
.align 2
DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
ldr r3, =offset_aecm_channelAdapt16
ldr r12, =offset_aecm_channelStored
movw r3, #offset_aecm_channelAdapt16
movw r12, #offset_aecm_channelStored
ldr r3, [r0, r3]
ldr r0, [r0, r12]
mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8.
@ -294,10 +294,10 @@ LOOP_STORE_ADAPTIVE_CHANNEL:
bx lr
@ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
.align 2
.align 2
DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
ldr r1, =offset_aecm_channelAdapt16
ldr r2, =offset_aecm_channelAdapt32
movw r1, #offset_aecm_channelAdapt16
movw r2, #offset_aecm_channelAdapt32
movw r3, #offset_aecm_channelStored
ldr r1, [r0, r1] @ &aecm->channelAdapt16[0]
ldr r2, [r0, r2] @ &aecm->channelAdapt32[0]
@ -321,8 +321,9 @@ LOOP_RESET_ADAPTIVE_CHANNEL:
bx lr
@ Square root of Hanning window in Q14.
.align 3
.align 4
WebRtcAecm_kSqrtHanning:
_WebRtcAecm_kSqrtHanning:
.short 0
.short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
.short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
@ -335,7 +336,7 @@ WebRtcAecm_kSqrtHanning:
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
@ the order was reversed and one element (0) was removed.
.align 3
.align 4
kSqrtHanningReversed:
.short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947
.short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571

View File

@ -174,11 +174,10 @@
'ns/nsx_core_neon.c',
],
'conditions': [
['OS=="android"', {
['OS=="android" or OS=="ios"', {
'dependencies': [
'audio_processing_offsets',
],
# TODO(kma): port this block from Android into other build systems.
'sources': [
'aecm/aecm_core_neon.S',
'ns/nsx_core_neon.S',
@ -192,7 +191,7 @@
],
}],
'conditions': [
['OS=="android"', {
['OS=="android" or OS=="ios"', {
'targets': [{
'target_name': 'audio_processing_offsets',
'type': 'none',

View File

@ -26,7 +26,9 @@ GLOBAL_LABEL WebRtcNsx_kLogTable
GLOBAL_LABEL WebRtcNsx_kCounterDiv
GLOBAL_LABEL WebRtcNsx_kLogTableFrac
.align 2
WebRtcNsx_kLogTableFrac:
_WebRtcNsx_kLogTableFrac:
.short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26
.short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50
.short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72
@ -45,7 +47,9 @@ WebRtcNsx_kLogTableFrac:
.short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255
.short 255
.align 2
WebRtcNsx_kCounterDiv:
_WebRtcNsx_kCounterDiv:
.short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979
.short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489
.short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964
@ -62,7 +66,9 @@ WebRtcNsx_kCounterDiv:
.short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174
.short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
.align 2
WebRtcNsx_kLogTable:
_WebRtcNsx_kLogTable:
.short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
@ void NoiseEstimationNeon(NsxInst_t* inst,
@ -82,6 +88,7 @@ WebRtcNsx_kLogTable:
@ r11: countDiv
@ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER
.align 2
DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
push {r4-r11, r14}
vpush {d8-d15}
@ -146,7 +153,8 @@ CHECK_LMAGN_COUNTER:
ldr r7, [r0, r7]
add r9, r0
cmp r7, #END_STARTUP_LONG
add r10, r0, #offset_nsx_noiseEstCounter
movw r10, #offset_nsx_noiseEstCounter
add r10, r0
movge r7, #FACTOR_Q7
movlt r7, #FACTOR_Q7_STARTUP
mov r4, r0
@ -307,7 +315,7 @@ UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER:
mov r0, r4
mov r1, r7
bl UpdateNoiseEstimateNeon
CALL_FUNCTION UpdateNoiseEstimateNeon
POST_UPDATE_DENSITY_ESTIMATE:
ldrh r3, [r10]
@ -324,7 +332,7 @@ POST_UPDATE_DENSITY_ESTIMATE:
sub r1, r7, r6
mov r0, r4
bl UpdateNoiseEstimateNeon
CALL_FUNCTION UpdateNoiseEstimateNeon
UPDATE_NOISE:
movw r1, #offset_nsx_noiseEstQuantile
@ -350,6 +358,7 @@ UPDATE_Q_NOISE:
@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset);
@ Neon registers touched: q0-q3, q8-q13.
.align 2
DEFINE_FUNCTION UpdateNoiseEstimateNeon
push {r4, r5, r6, r14}
mov r5, r0
@ -366,7 +375,7 @@ DEFINE_FUNCTION UpdateNoiseEstimateNeon
mov r0, r4
mov r1, r6
bl WebRtcSpl_MaxValueW16Neon
CALL_FUNCTION WebRtcSpl_MaxValueW16Neon
sub r12, r6, #1 @ Loop counter: inst->magnLen - 1.
@ -418,6 +427,7 @@ POST_LOOP_MAGNLEN:
pop {r4, r5, r6, pc}
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
.align 2
DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
push {r4-r8}
@ -533,6 +543,7 @@ LOOP_ANALEN2:
bx r14
@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
.align 2
DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon
movw r12, #offset_nsx_normData
movw r3, #offset_nsx_real
@ -563,6 +574,7 @@ LOOP_ANALEN:
@ void SynthesisUpdateNeon(NsxInst_t* inst,
@ int16_t* out_frame,
@ int16_t gain_factor);
.align 2
DEFINE_FUNCTION WebRtcNsx_SynthesisUpdateNeon
push {r4, r5}
@ -635,6 +647,7 @@ EXIT_SYNTHESISUPDATE:
bx r14
@ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech);
.align 2
DEFINE_FUNCTION WebRtcNsx_AnalysisUpdateNeon
push {r4-r6}
@ -693,6 +706,7 @@ POST_LOOP_WINDOW_DATA:
bx r14
@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
.align 2
DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon
movw r3, #offset_nsx_anaLen
movw r12, #offset_nsx_normData

View File

@ -45,6 +45,15 @@ bl \name
.endm
#endif
// With llvm and clang compilers, for instructions ldrb, strh, etc.,
// the condition code is after the width specifier. Here we define
// only the ones that are actually used in the assembly files.
#ifdef __llvm__
.macro streqh reg1, reg2, num
strheq \reg1, \reg2, \num
.endm
#endif
.text
#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_