Ported assembly coding in APM from Android to iOS.

Bugs=none Test=trybots, and offline file bit-exact tests. Review URL: https://webrtc-codereview.appspot.com/1066009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3563 4adac7df-926f-26a2-2b94-8c16560cd09d
2013-02-23 04:16:59 +00:00 · 2013-02-23 04:16:59 +00:00 · 2f9bd247ad
commit 2f9bd247ad
parent 0d8d010017
6 changed files with 85 additions and 40 deletions
--- a/webrtc/build/generate_asm_header.gypi
+++ b/webrtc/build/generate_asm_header.gypi
@ -32,6 +32,21 @@
  'variables': {
    'out_dir': '<(SHARED_INTERMEDIATE_DIR)/<(asm_header_dir)',
    'process_outputs_as_sources': 1,
    'conditions': [
      # We only support Android and iOS.
      ['OS=="android"', {
        'compiler_to_use':
          '<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} <(android_toolchain)/*-gcc)',
        'compiler_options': '-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
        'pattern_to_detect': 'offset_',
      }],
      ['OS=="ios"', {
        'compiler_to_use': 'clang',
        'compiler_options':
          '-arch armv7 -I<(webrtc_root)/.. -isysroot $(SDKROOT) -S',
        'pattern_to_detect': '_offset_',
      }],
    ]
  },
  'rules': [
    {
@ -46,10 +61,9 @@
      'action': [
        'python',
        '<(webrtc_root)/build/generate_asm_header.py',
-        '--compiler=<!(/bin/echo -n ${ANDROID_GOMA_WRAPPER} '
+        '--compiler=<(compiler_to_use)',
-          '<(android_toolchain)/*-gcc)',
+        '--options=<(compiler_options)',
-        # Compiler options.
+        '--pattern=<(pattern_to_detect)',
        '--options=-I<(webrtc_root)/.. -I<@(android_ndk_include) -S',
        '--dir=<(out_dir)',
        '<(RULE_INPUT_PATH)',
      ],
--- a/webrtc/build/generate_asm_header.py
+++ b/webrtc/build/generate_asm_header.py
@ -19,8 +19,9 @@ and writes them into header files.
 """
 import os
-import sys
+import re
 import subprocess
 import sys
 from optparse import OptionParser
 def main(argv):
@ -44,6 +45,7 @@ def main(argv):
  # Set the shell command with the compiler and options inputs.
  compiler_command = (options.compiler + " " + options.options + " " +
      input_filename + " -o " + interim_filename)
  # Run the shell command and generate the intermediate file.
  subprocess.check_call(compiler_command, shell=True)
@ -51,13 +53,19 @@ def main(argv):
  out_file = open(out_filename, 'w')  # The output header file.
  # Generate the output header file.
-  for line in interim_file:  # Iterate though all the lines in the input file.
+  while True:
    line = interim_file.readline()
    if not line: break
    if line.startswith(options.pattern):
-      out_file.write('#define ')
+      # Find name of the next constant and write to the output file.
-      out_file.write(line.split(':')[0])  # Write the constant name.
+      const_name = re.sub(r'^_', '', line.split(':')[0])
-      out_file.write(' ')
+      out_file.write('#define %s ' % const_name)
-    if line.find('.word') >= 0:
+
-      out_file.write(line.split('.word')[1])  # Write the constant value.
+      # Find value of the constant we just found and write to the output file.
      line = interim_file.readline()
      const_value = filter(str.isdigit, line.split(' ')[0])
      if const_value != '':
        out_file.write('%s\n' % const_value)
  interim_file.close()
  out_file.close()
--- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.S
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.S
@ -60,12 +60,12 @@ LOOP_PART_LEN:
  bgt LOOP_PART_LEN
  @ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
-  ldr r12, =offset_aecm_real_fft
+  movw r12, #offset_aecm_real_fft
  sub r1, #(PART_LEN * 4)                    @ Get r1 back to &fft[0].
  mov r2, r3                                 @ freq_signal
  mov r4, r3
  ldr r0, [r0, r12]                          @ aecm->real_fft
-  bl  WebRtcSpl_RealForwardFFTNeon
+  CALL_FUNCTION WebRtcSpl_RealForwardFFTNeon
  mov r12, #(PART_LEN * 2 / 16)              @ Loop counter, unrolled by 16.
@ -119,15 +119,15 @@ LOOP_PRE_IFFT:
  str r8, [r4, r3]
  @ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
-  ldr r12, =offset_aecm_real_fft
+  movw r12, #offset_aecm_real_fft
  sub r1, #(PART_LEN * 4)                    @ Get r1 back to &fft[0].
  sub r2, #(PART_LEN * 4)                    @ Get r2 back to &efw[0].
  mov r4, r2                                 @ Keep efw in r4.
  ldr r0, [r0, r12]                          @ aecm->real_fft
-  bl  WebRtcSpl_RealInverseFFTNeon
+  CALL_FUNCTION WebRtcSpl_RealInverseFFTNeon
-  ldr r6, =offset_aecm_outBuf
+  movw r6, #offset_aecm_outBuf
-  ldr r12, =offset_aecm_dfaCleanQDomain
+  movw r12, #offset_aecm_dfaCleanQDomain
  ldr r8, [r5, r6]                           @ &aecm->outBuf[0]
  ldrsh r2, [r5, r12]                        @ &aecm->dfaCleanQDomain[0]
@ -160,8 +160,8 @@ LOOP_POST_IFFT:
  vst1.16 d0, [r8, :64]!                     @ aecm->outBuf[i]
  bgt LOOP_POST_IFFT
-  ldr r3, =offset_aecm_xBuf
+  movw r3, #offset_aecm_xBuf
-  ldr r12, =offset_aecm_dBufNoisy
+  movw r12, #offset_aecm_dBufNoisy
  ldr r3, [r5, r3]                           @ &aecm->xBuf[0]
  ldr r1, [r5, r12]                          @ &aecm->dBufNoisy[0]
  add r2, r3, #(PART_LEN * 2)                @ &aecm->xBuf[PART_LEN]
@ -180,7 +180,7 @@ LOOP_COPY:
  cmp r2, #0                                  @ Check if (nearendClean != NULL).
  beq END
-  ldr r4, =offset_aecm_dBufClean
+  movw r4, #offset_aecm_dBufClean
  ldr r1, [r5, r4]                            @ &aecm->dBufClean[0]
  add r0, r1, #(PART_LEN * 2)                 @ &aecm->dBufClean[PART_LEN]
@ -210,8 +210,8 @@ DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
  vmov.i32 q8,  #0
  vmov.i32 q9,  #0
-  ldr r7, =offset_aecm_channelStored
+  movw r7, #offset_aecm_channelStored
-  ldr r5, =offset_aecm_channelAdapt16
+  movw r5, #offset_aecm_channelAdapt16
  mov r4, r2
  mov r12, #(PART_LEN / 8)                   @  Loop counter, unrolled by 8.
@ -269,8 +269,8 @@ LOOP_CALC_LINEAR_ENERGIES:
@                                          int32_t* echo_est);
 .align 2
 DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
-  ldr r3, =offset_aecm_channelAdapt16
+  movw r3, #offset_aecm_channelAdapt16
-  ldr r12, =offset_aecm_channelStored
+  movw r12, #offset_aecm_channelStored
  ldr r3, [r0, r3]
  ldr r0, [r0, r12]
  mov r12, #(PART_LEN / 8)                   @ Loop counter, unrolled by 8.
@ -296,8 +296,8 @@ LOOP_STORE_ADAPTIVE_CHANNEL:
@ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
 .align 2
 DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
-  ldr r1, =offset_aecm_channelAdapt16
+  movw r1, #offset_aecm_channelAdapt16
-  ldr r2, =offset_aecm_channelAdapt32
+  movw r2, #offset_aecm_channelAdapt32
  movw r3, #offset_aecm_channelStored
  ldr r1, [r0, r1]                           @ &aecm->channelAdapt16[0]
  ldr r2, [r0, r2]                           @ &aecm->channelAdapt32[0]
@ -321,8 +321,9 @@ LOOP_RESET_ADAPTIVE_CHANNEL:
  bx  lr
@ Square root of Hanning window in Q14.
-.align 3
+.align 4
 WebRtcAecm_kSqrtHanning:
 _WebRtcAecm_kSqrtHanning:
  .short 0
  .short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
  .short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
@ -335,7 +336,7 @@ WebRtcAecm_kSqrtHanning:
@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
@ the order was reversed and one element (0) was removed.
-.align  3
+.align 4
 kSqrtHanningReversed:
  .short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947
  .short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@ -174,11 +174,10 @@
          'ns/nsx_core_neon.c',
        ],
        'conditions': [
-          ['OS=="android"', {
+          ['OS=="android" or OS=="ios"', {
            'dependencies': [
              'audio_processing_offsets',
            ],
            # TODO(kma): port this block from Android into other build systems.
            'sources': [
              'aecm/aecm_core_neon.S',
              'ns/nsx_core_neon.S',
@ -192,7 +191,7 @@
        ],
      }],
      'conditions': [
-        ['OS=="android"', {
+        ['OS=="android" or OS=="ios"', {
          'targets': [{
            'target_name': 'audio_processing_offsets',
            'type': 'none',
--- a/webrtc/modules/audio_processing/ns/nsx_core_neon.S
+++ b/webrtc/modules/audio_processing/ns/nsx_core_neon.S
@ -26,7 +26,9 @@ GLOBAL_LABEL WebRtcNsx_kLogTable
 GLOBAL_LABEL WebRtcNsx_kCounterDiv
 GLOBAL_LABEL WebRtcNsx_kLogTableFrac
 .align 2
 WebRtcNsx_kLogTableFrac:
 _WebRtcNsx_kLogTableFrac:
 .short 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26
 .short 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50
 .short 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72
@ -45,7 +47,9 @@ WebRtcNsx_kLogTableFrac:
 .short 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255
 .short 255
 .align 2
 WebRtcNsx_kCounterDiv:
 _WebRtcNsx_kCounterDiv:
 .short 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979
 .short 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489
 .short 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964
@ -62,7 +66,9 @@ WebRtcNsx_kCounterDiv:
 .short 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174
 .short 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
 .align 2
 WebRtcNsx_kLogTable:
 _WebRtcNsx_kLogTable:
 .short  0, 177, 355, 532, 710, 887, 1065, 1242, 1420
@ void NoiseEstimationNeon(NsxInst_t* inst,
@ -82,6 +88,7 @@ WebRtcNsx_kLogTable:
@ r11: countDiv
@ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER
 .align 2
 DEFINE_FUNCTION WebRtcNsx_NoiseEstimationNeon
  push {r4-r11, r14}
  vpush {d8-d15}
@ -146,7 +153,8 @@ CHECK_LMAGN_COUNTER:
  ldr r7, [r0, r7]
  add r9, r0
  cmp r7, #END_STARTUP_LONG
-  add r10, r0, #offset_nsx_noiseEstCounter
+  movw r10, #offset_nsx_noiseEstCounter
  add r10, r0
  movge r7, #FACTOR_Q7
  movlt r7, #FACTOR_Q7_STARTUP
  mov r4, r0
@ -307,7 +315,7 @@ UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER:
  mov r0, r4
  mov r1, r7
-  bl UpdateNoiseEstimateNeon
+  CALL_FUNCTION UpdateNoiseEstimateNeon
 POST_UPDATE_DENSITY_ESTIMATE:
  ldrh r3, [r10]
@ -324,7 +332,7 @@ POST_UPDATE_DENSITY_ESTIMATE:
  sub r1, r7, r6
  mov r0, r4
-  bl UpdateNoiseEstimateNeon
+  CALL_FUNCTION UpdateNoiseEstimateNeon
 UPDATE_NOISE:
  movw r1, #offset_nsx_noiseEstQuantile
@ -350,6 +358,7 @@ UPDATE_Q_NOISE:
@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset);
@ Neon registers touched: q0-q3, q8-q13.
 .align 2
 DEFINE_FUNCTION UpdateNoiseEstimateNeon
  push {r4, r5, r6, r14}
  mov r5, r0
@ -366,7 +375,7 @@ DEFINE_FUNCTION UpdateNoiseEstimateNeon
  mov r0, r4
  mov r1, r6
-  bl WebRtcSpl_MaxValueW16Neon
+  CALL_FUNCTION WebRtcSpl_MaxValueW16Neon
  sub r12, r6, #1             @ Loop counter: inst->magnLen - 1.
@ -418,6 +427,7 @@ POST_LOOP_MAGNLEN:
  pop {r4, r5, r6, pc}
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
 .align 2
 DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
  push {r4-r8}
@ -533,6 +543,7 @@ LOOP_ANALEN2:
  bx r14
@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
 .align 2
 DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon
  movw r12, #offset_nsx_normData
  movw r3, #offset_nsx_real
@ -563,6 +574,7 @@ LOOP_ANALEN:
@ void SynthesisUpdateNeon(NsxInst_t* inst,
@                          int16_t* out_frame,
@                          int16_t gain_factor);
 .align 2
 DEFINE_FUNCTION WebRtcNsx_SynthesisUpdateNeon
  push {r4, r5}
@ -635,6 +647,7 @@ EXIT_SYNTHESISUPDATE:
  bx r14
@ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech);
 .align 2
 DEFINE_FUNCTION WebRtcNsx_AnalysisUpdateNeon
  push {r4-r6}
@ -693,6 +706,7 @@ POST_LOOP_WINDOW_DATA:
  bx r14
@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
 .align 2
 DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon
  movw r3, #offset_nsx_anaLen
  movw r12, #offset_nsx_normData
--- a/webrtc/system_wrappers/interface/asm_defines.h
+++ b/webrtc/system_wrappers/interface/asm_defines.h
@ -45,6 +45,15 @@ bl \name
 .endm
 #endif
 // With llvm and clang compilers, for instructions ldrb, strh, etc.,
 // the condition code is after the width specifier. Here we define
 // only the ones that are actually used in the assembly files.
 #ifdef __llvm__
 .macro streqh reg1, reg2, num
 strheq \reg1, \reg2, \num
 .endm
 #endif
 .text
 #endif  // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_