Ported assembly coding in APM from Android to iOS.

Bugs=none
Test=trybots, and offline file bit-exact tests.
Review URL: https://webrtc-codereview.appspot.com/1066009

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3563 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2013-02-23 04:16:59 +00:00
parent 0d8d010017
commit 2f9bd247ad
6 changed files with 85 additions and 40 deletions

View File

@ -32,6 +32,21 @@
'variables': { 'variables': {
'out_dir': '<(SHARED_INTERMEDIATE_DIR)/<(asm_header_dir)', 'out_dir': '<(SHARED_INTERMEDIATE_DIR)/<(asm_header_dir)',
'process_outputs_as_sources': 1, 'process_outputs_as_sources': 1,
'conditions': [
# We only support Android and iOS.
['OS=="android"', {
'compiler_to_use':
'<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} <(android_toolchain)/*-gcc)',
'compiler_options': '-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
'pattern_to_detect': 'offset_',
}],
['OS=="ios"', {
'compiler_to_use': 'clang',
'compiler_options':
'-arch armv7 -I<(webrtc_root)/.. -isysroot $(SDKROOT) -S',
'pattern_to_detect': '_offset_',
}],
]
}, },
'rules': [ 'rules': [
{ {
@ -46,10 +61,9 @@
'action': [ 'action': [
'python', 'python',
'<(webrtc_root)/build/generate_asm_header.py', '<(webrtc_root)/build/generate_asm_header.py',
'--compiler=<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} ' '--compiler=<(compiler_to_use)',
'<(android_toolchain)/*-gcc)', '--options=<(compiler_options)',
# Compiler options. '--pattern=<(pattern_to_detect)',
'--options=-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
'--dir=<(out_dir)', '--dir=<(out_dir)',
'<(RULE_INPUT_PATH)', '<(RULE_INPUT_PATH)',
], ],

View File

@ -19,8 +19,9 @@ and writes them into header files.
""" """
import os import os
import sys import re
import subprocess import subprocess
import sys
from optparse import OptionParser from optparse import OptionParser
def main(argv): def main(argv):
@ -44,6 +45,7 @@ def main(argv):
# Set the shell command with the compiler and options inputs. # Set the shell command with the compiler and options inputs.
compiler_command = (options.compiler + " " + options.options + " " + compiler_command = (options.compiler + " " + options.options + " " +
input_filename + " -o " + interim_filename) input_filename + " -o " + interim_filename)
# Run the shell command and generate the intermediate file. # Run the shell command and generate the intermediate file.
subprocess.check_call(compiler_command, shell=True) subprocess.check_call(compiler_command, shell=True)
@ -51,13 +53,19 @@ def main(argv):
out_file = open(out_filename, 'w') # The output header file. out_file = open(out_filename, 'w') # The output header file.
# Generate the output header file. # Generate the output header file.
for line in interim_file: # Iterate though all the lines in the input file. while True:
line = interim_file.readline()
if not line: break
if line.startswith(options.pattern): if line.startswith(options.pattern):
out_file.write('#define ') # Find name of the next constant and write to the output file.
out_file.write(line.split(':')[0]) # Write the constant name. const_name = re.sub(r'^_', '', line.split(':')[0])
out_file.write(' ') out_file.write('#define %s ' % const_name)
if line.find('.word') >= 0:
out_file.write(line.split('.word')[1]) # Write the constant value. # Find value of the constant we just found and write to the output file.
line = interim_file.readline()
const_value = filter(str.isdigit, line.split(' ')[0])
if const_value != '':
out_file.write('%s\n' % const_value)
interim_file.close() interim_file.close()
out_file.close() out_file.close()

View File

@ -60,12 +60,12 @@ LOOP_PART_LEN:
bgt LOOP_PART_LEN bgt LOOP_PART_LEN
@ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); @ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
ldr r12, =offset_aecm_real_fft movw r12, #offset_aecm_real_fft
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0]. sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
mov r2, r3 @ freq_signal mov r2, r3 @ freq_signal
mov r4, r3 mov r4, r3
ldr r0, [r0, r12] @ aecm->real_fft ldr r0, [r0, r12] @ aecm->real_fft
bl WebRtcSpl_RealForwardFFTNeon CALL_FUNCTION WebRtcSpl_RealForwardFFTNeon
mov r12, #(PART_LEN * 2 / 16) @ Loop counter, unrolled by 16. mov r12, #(PART_LEN * 2 / 16) @ Loop counter, unrolled by 16.
@ -119,15 +119,15 @@ LOOP_PRE_IFFT:
str r8, [r4, r3] str r8, [r4, r3]
@ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw); @ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
ldr r12, =offset_aecm_real_fft movw r12, #offset_aecm_real_fft
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0]. sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
sub r2, #(PART_LEN * 4) @ Get r2 back to &efw[0]. sub r2, #(PART_LEN * 4) @ Get r2 back to &efw[0].
mov r4, r2 @ Keep efw in r4. mov r4, r2 @ Keep efw in r4.
ldr r0, [r0, r12] @ aecm->real_fft ldr r0, [r0, r12] @ aecm->real_fft
bl WebRtcSpl_RealInverseFFTNeon CALL_FUNCTION WebRtcSpl_RealInverseFFTNeon
ldr r6, =offset_aecm_outBuf movw r6, #offset_aecm_outBuf
ldr r12, =offset_aecm_dfaCleanQDomain movw r12, #offset_aecm_dfaCleanQDomain
ldr r8, [r5, r6] @ &aecm->outBuf[0] ldr r8, [r5, r6] @ &aecm->outBuf[0]
ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0] ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0]
@ -160,8 +160,8 @@ LOOP_POST_IFFT:
vst1.16 d0, [r8, :64]! @ aecm->outBuf[i] vst1.16 d0, [r8, :64]! @ aecm->outBuf[i]
bgt LOOP_POST_IFFT bgt LOOP_POST_IFFT
ldr r3, =offset_aecm_xBuf movw r3, #offset_aecm_xBuf
ldr r12, =offset_aecm_dBufNoisy movw r12, #offset_aecm_dBufNoisy
ldr r3, [r5, r3] @ &aecm->xBuf[0] ldr r3, [r5, r3] @ &aecm->xBuf[0]
ldr r1, [r5, r12] @ &aecm->dBufNoisy[0] ldr r1, [r5, r12] @ &aecm->dBufNoisy[0]
add r2, r3, #(PART_LEN * 2) @ &aecm->xBuf[PART_LEN] add r2, r3, #(PART_LEN * 2) @ &aecm->xBuf[PART_LEN]
@ -180,7 +180,7 @@ LOOP_COPY:
cmp r2, #0 @ Check if (nearendClean != NULL). cmp r2, #0 @ Check if (nearendClean != NULL).
beq END beq END
ldr r4, =offset_aecm_dBufClean movw r4, #offset_aecm_dBufClean
ldr r1, [r5, r4] @ &aecm->dBufClean[0] ldr r1, [r5, r4] @ &aecm->dBufClean[0]
add r0, r1, #(PART_LEN * 2) @ &aecm->dBufClean[PART_LEN] add r0, r1, #(PART_LEN * 2) @ &aecm->dBufClean[PART_LEN]
@ -210,8 +210,8 @@ DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
vmov.i32 q8, #0 vmov.i32 q8, #0
vmov.i32 q9, #0 vmov.i32 q9, #0
ldr r7, =offset_aecm_channelStored movw r7, #offset_aecm_channelStored
ldr r5, =offset_aecm_channelAdapt16 movw r5, #offset_aecm_channelAdapt16
mov r4, r2 mov r4, r2
mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8. mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8.
@ -269,8 +269,8 @@ LOOP_CALC_LINEAR_ENERGIES:
@ int32_t* echo_est); @ int32_t* echo_est);
.align 2 .align 2
DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
ldr r3, =offset_aecm_channelAdapt16 movw r3, #offset_aecm_channelAdapt16
ldr r12, =offset_aecm_channelStored movw r12, #offset_aecm_channelStored
ldr r3, [r0, r3] ldr r3, [r0, r3]
ldr r0, [r0, r12] ldr r0, [r0, r12]
mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8. mov r12, #(PART_LEN / 8) @ Loop counter, unrolled by 8.
@ -296,8 +296,8 @@ LOOP_STORE_ADAPTIVE_CHANNEL:
@ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm); @ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
.align 2 .align 2
DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
ldr r1, =offset_aecm_channelAdapt16 movw r1, #offset_aecm_channelAdapt16
ldr r2, =offset_aecm_channelAdapt32 movw r2, #offset_aecm_channelAdapt32
movw r3, #offset_aecm_channelStored movw r3, #offset_aecm_channelStored
ldr r1, [r0, r1] @ &aecm->channelAdapt16[0] ldr r1, [r0, r1] @ &aecm->channelAdapt16[0]
ldr r2, [r0, r2] @ &aecm->channelAdapt32[0] ldr r2, [r0, r2] @ &aecm->channelAdapt32[0]
@ -321,8 +321,9 @@ LOOP_RESET_ADAPTIVE_CHANNEL:
bx lr bx lr
@ Square root of Hanning window in Q14. @ Square root of Hanning window in Q14.
.align 3 .align 4
WebRtcAecm_kSqrtHanning: WebRtcAecm_kSqrtHanning:
_WebRtcAecm_kSqrtHanning:
.short 0 .short 0
.short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172 .short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
.short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224 .short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
@ -335,7 +336,7 @@ WebRtcAecm_kSqrtHanning:
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning, @ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
@ the order was reversed and one element (0) was removed. @ the order was reversed and one element (0) was removed.
.align 3 .align 4
kSqrtHanningReversed: kSqrtHanningReversed:
.short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947 .short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947
.short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571 .short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571

View File

@ -174,11 +174,10 @@
'ns/nsx_core_neon.c', 'ns/nsx_core_neon.c',
], ],
'conditions': [ 'conditions': [
['OS=="android"', { ['OS=="android" or OS=="ios"', {
'dependencies': [ 'dependencies': [
'audio_processing_offsets', 'audio_processing_offsets',
], ],
# TODO(kma): port this block from Android into other build systems.
'sources': [ 'sources': [
'aecm/aecm_core_neon.S', 'aecm/aecm_core_neon.S',
'ns/nsx_core_neon.S', 'ns/nsx_core_neon.S',
@ -192,7 +191,7 @@
], ],
}], }],
'conditions': [ 'conditions': [
['OS=="android"', { ['OS=="android" or OS=="ios"', {
'targets': [{ 'targets': [{
'target_name': 'audio_processing_offsets', 'target_name': 'audio_processing_offsets',
'type': 'none', 'type': 'none',

View File

@ -26,7 +26,9 @@ GLOBAL_LABEL WebRtcNsx_kLogTable
GLOBAL_LABEL WebRtcNsx_kCounterDiv GLOBAL_LABEL WebRtcNsx_kCounterDiv
GLOBAL_LABEL WebRtcNsx_kLogTableFrac GLOBAL_LABEL WebRtcNsx_kLogTableFrac
.align 2
WebRtcNsx_kLogTableFrac: WebRtcNsx_kLogTableFrac:
_WebRtcNsx_kLogTableFrac:
.short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26 .short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26
.short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50 .short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50
.short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72 .short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72
@ -45,7 +47,9 @@ WebRtcNsx_kLogTableFrac:
.short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255 .short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255
.short 255 .short 255
.align 2
WebRtcNsx_kCounterDiv: WebRtcNsx_kCounterDiv:
_WebRtcNsx_kCounterDiv:
.short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979 .short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979
.short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489 .short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489
.short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964 .short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964
@ -62,7 +66,9 @@ WebRtcNsx_kCounterDiv:
.short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174 .short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174
.short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 .short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
.align 2
WebRtcNsx_kLogTable: WebRtcNsx_kLogTable:
_WebRtcNsx_kLogTable:
.short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 .short 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
@ void NoiseEstimationNeon(NsxInst_t* inst, @ void NoiseEstimationNeon(NsxInst_t* inst,
@ -82,6 +88,7 @@ WebRtcNsx_kLogTable:
@ r11: countDiv @ r11: countDiv
@ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER @ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER
.align 2
DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
push {r4-r11, r14} push {r4-r11, r14}
vpush {d8-d15} vpush {d8-d15}
@ -146,7 +153,8 @@ CHECK_LMAGN_COUNTER:
ldr r7, [r0, r7] ldr r7, [r0, r7]
add r9, r0 add r9, r0
cmp r7, #END_STARTUP_LONG cmp r7, #END_STARTUP_LONG
add r10, r0, #offset_nsx_noiseEstCounter movw r10, #offset_nsx_noiseEstCounter
add r10, r0
movge r7, #FACTOR_Q7 movge r7, #FACTOR_Q7
movlt r7, #FACTOR_Q7_STARTUP movlt r7, #FACTOR_Q7_STARTUP
mov r4, r0 mov r4, r0
@ -307,7 +315,7 @@ UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER:
mov r0, r4 mov r0, r4
mov r1, r7 mov r1, r7
bl UpdateNoiseEstimateNeon CALL_FUNCTION UpdateNoiseEstimateNeon
POST_UPDATE_DENSITY_ESTIMATE: POST_UPDATE_DENSITY_ESTIMATE:
ldrh r3, [r10] ldrh r3, [r10]
@ -324,7 +332,7 @@ POST_UPDATE_DENSITY_ESTIMATE:
sub r1, r7, r6 sub r1, r7, r6
mov r0, r4 mov r0, r4
bl UpdateNoiseEstimateNeon CALL_FUNCTION UpdateNoiseEstimateNeon
UPDATE_NOISE: UPDATE_NOISE:
movw r1, #offset_nsx_noiseEstQuantile movw r1, #offset_nsx_noiseEstQuantile
@ -350,6 +358,7 @@ UPDATE_Q_NOISE:
@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset); @ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset);
@ Neon registers touched: q0-q3, q8-q13. @ Neon registers touched: q0-q3, q8-q13.
.align 2
DEFINE_FUNCTION UpdateNoiseEstimateNeon DEFINE_FUNCTION UpdateNoiseEstimateNeon
push {r4, r5, r6, r14} push {r4, r5, r6, r14}
mov r5, r0 mov r5, r0
@ -366,7 +375,7 @@ DEFINE_FUNCTION UpdateNoiseEstimateNeon
mov r0, r4 mov r0, r4
mov r1, r6 mov r1, r6
bl WebRtcSpl_MaxValueW16Neon CALL_FUNCTION WebRtcSpl_MaxValueW16Neon
sub r12, r6, #1 @ Loop counter: inst->magnLen - 1. sub r12, r6, #1 @ Loop counter: inst->magnLen - 1.
@ -418,6 +427,7 @@ POST_LOOP_MAGNLEN:
pop {r4, r5, r6, pc} pop {r4, r5, r6, pc}
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf); @ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
.align 2
DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
push {r4-r8} push {r4-r8}
@ -533,6 +543,7 @@ LOOP_ANALEN2:
bx r14 bx r14
@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor); @ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
.align 2
DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon
movw r12, #offset_nsx_normData movw r12, #offset_nsx_normData
movw r3, #offset_nsx_real movw r3, #offset_nsx_real
@ -563,6 +574,7 @@ LOOP_ANALEN:
@ void SynthesisUpdateNeon(NsxInst_t* inst, @ void SynthesisUpdateNeon(NsxInst_t* inst,
@ int16_t* out_frame, @ int16_t* out_frame,
@ int16_t gain_factor); @ int16_t gain_factor);
.align 2
DEFINE_FUNCTION WebRtcNsx_SynthesisUpdateNeon DEFINE_FUNCTION WebRtcNsx_SynthesisUpdateNeon
push {r4, r5} push {r4, r5}
@ -635,6 +647,7 @@ EXIT_SYNTHESISUPDATE:
bx r14 bx r14
@ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech); @ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech);
.align 2
DEFINE_FUNCTION WebRtcNsx_AnalysisUpdateNeon DEFINE_FUNCTION WebRtcNsx_AnalysisUpdateNeon
push {r4-r6} push {r4-r6}
@ -693,6 +706,7 @@ POST_LOOP_WINDOW_DATA:
bx r14 bx r14
@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out); @ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
.align 2
DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon
movw r3, #offset_nsx_anaLen movw r3, #offset_nsx_anaLen
movw r12, #offset_nsx_normData movw r12, #offset_nsx_normData

View File

@ -45,6 +45,15 @@ bl \name
.endm .endm
#endif #endif
// With llvm and clang compilers, for instructions ldrb, strh, etc.,
// the condition code is after the width specifier. Here we define
// only the ones that are actually used in the assembly files.
#ifdef __llvm__
.macro streqh reg1, reg2, num
strheq \reg1, \reg2, \num
.endm
#endif
.text .text
#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ #endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_