Assembly coding for pitch filter in iSAC for ARMv6.

Review URL: https://webrtc-codereview.appspot.com/631004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2501 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-07-10 19:30:57 +00:00
parent e2c16a83bc
commit adf8ddf4aa
4 changed files with 182 additions and 16 deletions
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk
+++ b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk
@@ -1,4 +1,4 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 #
 # Use of this source code is governed by a BSD-style license
 # that can be found in the LICENSE file in the root of the source
@@ -46,8 +46,10 @@ LOCAL_SRC_FILES := \
    transform.c

 ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
+# Using .S (instead of .s) extention is to include a C header file in assembly.
 LOCAL_SRC_FILES += \
-    lattice_armv7.S
+    lattice_armv7.S \
+    pitchfilter_armv6.S
 else
 LOCAL_SRC_FILES += \
    lattice_c.c
@@ -84,7 +86,7 @@ LOCAL_MODULE := libwebrtc_isacfix_neon
 LOCAL_MODULE_TAGS := optional
 LOCAL_SRC_FILES := \
    filters_neon.c \
-    lattice_neon.S #.S extention is for including a header file in assembly.
+    lattice_neon.S

 # Flags passed to both C and C++ files.
 LOCAL_CFLAGS := \
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.h
+++ b/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.h
@@ -42,6 +42,17 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16 *indatFix,
                               WebRtc_Word16 *gainsQ12,
                               WebRtc_Word16 type);

+void WebRtcIsacfix_PitchFilterCore(int loopNumber,
+                                   WebRtc_Word16 gain,
+                                   int index,
+                                   WebRtc_Word16 sign,
+                                   WebRtc_Word16* inputState,
+                                   WebRtc_Word16* outputBuff2,
+                                   const WebRtc_Word16* coefficient,
+                                   WebRtc_Word16* inputBuf,
+                                   WebRtc_Word16* outputBuf,
+                                   int* index2);
+
 void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0,
                                    PitchFiltstr *pfp,
                                    WebRtc_Word16 *lagsQ7,
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_filter.c
+++ b/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_filter.c
@@ -55,18 +55,19 @@ static __inline WebRtc_Word32 CalcLrIntQ(WebRtc_Word32 fixVal,
  return intgr;
 }

+#ifndef WEBRTC_ARCH_ARM_V7A
 // Pitch filtering.
 // TODO(Turaj): Add descriptions of input and output parameters.
-static void PitchFilter(int loopNumber,
-                        WebRtc_Word16 gain,
-                        int index,
-                        WebRtc_Word16 sign,
-                        WebRtc_Word16* inputState,
-                        WebRtc_Word16* outputBuf2,
-                        const WebRtc_Word16* coefficient,
-                        WebRtc_Word16* inputBuf,
-                        WebRtc_Word16* outputBuf,
-                        int* index2) {
+void WebRtcIsacfix_PitchFilterCore(int loopNumber,
+                                   WebRtc_Word16 gain,
+                                   int index,
+                                   WebRtc_Word16 sign,
+                                   WebRtc_Word16* inputState,
+                                   WebRtc_Word16* outputBuf2,
+                                   const WebRtc_Word16* coefficient,
+                                   WebRtc_Word16* inputBuf,
+                                   WebRtc_Word16* outputBuf,
+                                   int* index2) {
  int i = 0, j = 0;  // Loop counters.
  WebRtc_Word16* ubufQQpos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)];
  WebRtc_Word16 tmpW16 = 0;
@@ -112,6 +113,11 @@ static void PitchFilter(int loopNumber,
    (*index2)++;
  }
 }
+#else
+// These two conditions are assumptions in ARM assembly file.
+WEBRTC_STATIC_ASSERT(PITCH_FRACORDER, PITCH_FRACORDER == 9);
+WEBRTC_STATIC_ASSERT(PITCH_DAMPORDER, PITCH_DAMPORDER == 5);
+#endif

 void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
                                                       // Q0 if type is 2.
@@ -192,8 +198,8 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
      fracoeffQQ = kIntrpCoef[frcQQ];

      // Pitch filtering.
-      PitchFilter(PITCH_SUBFRAME_LEN / kSegments, curGainQ12, indW32, sign,
-          inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
+      WebRtcIsacfix_PitchFilterCore(PITCH_SUBFRAME_LEN / kSegments, curGainQ12,
+        indW32, sign, inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
    }
  }

@@ -206,7 +212,7 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,

  if (type == 2) {
    // Filter look-ahead segment.
-    PitchFilter(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
+    WebRtcIsacfix_PitchFilterCore(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
                ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
  }
 }
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/pitchfilter_armv6.S
+++ b/src/modules/audio_coding/codecs/iSAC/fix/source/pitchfilter_armv6.S
@@ -0,0 +1,147 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ Contains the core loop routine for the pitch filter function in iSAC,
+@ optimized for ARMv7 platforms.
+@
+@ Output is bit-exact with the reference C code in pitch_filter.c.
+
+#include "settings.h"
+
+.arch armv6
+.align  2
+.global WebRtcIsacfix_PitchFilterCore
+
+
+@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
+@                                    WebRtc_Word16 gain,
+@                                    int index,
+@                                    WebRtc_Word16 sign,
+@                                    WebRtc_Word16* inputState,
+@                                    WebRtc_Word16* outputBuf2,
+@                                    const WebRtc_Word16* coefficient,
+@                                    WebRtc_Word16* inputBuf,
+@                                    WebRtc_Word16* outputBuf,
+@                                    int* index2) {
+
+WebRtcIsacfix_PitchFilterCore:
+.fnstart
+  push {r4-r11}
+  sub sp, #8
+
+  str r0, [sp]                @ loopNumber
+  str r3, [sp, #4]            @ sign
+  ldr r3, [sp, #44]           @ outputBuf2
+  ldr r6, [sp, #60]           @ index2
+  ldr r7, [r6]                @ *index2
+  ldr r8, [sp, #52]           @ inputBuf
+  ldr r12, [sp, #56]          @ outputBuf
+
+  add r4, r7, r0
+  str r4, [r6]                @ Store return value to index2.
+
+  mov r10, r7, asl #1
+  add r12, r10                @ &outputBuf[*index2]
+  add r8, r10                 @ &inputBuf[*index2]
+
+  add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
+  add r6, r3, r4, lsl #1      @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
+  sub r4, r2                  @ r2: index
+  sub r4, #2                  @ *index2 + PITCH_BUFFSIZE - index - 2
+  add r3, r4, lsl #1          @ &ubufQQpos2[*index2]
+  ldr r9, [sp, #48]           @ coefficient
+
+LOOP:
+@ Usage of registers in the loop:
+@  r0: loop counter
+@  r1: gain
+@  r2: tmpW32
+@  r3: &ubufQQpos2[]
+@  r6: &outputBuf2[]
+@  r8: &inputBuf[]
+@  r9: &coefficient[]
+@  r12: &outputBuf[]
+@  r4, r5, r7, r10, r11: scratch
+
+  @ Filter to get fractional pitch.
+  @ The pitch filter loop here is unrolled with 9 multipications.
+  pld [r3]
+  ldr r10, [r3], #4           @ ubufQQpos2[*index2 + 0, *index2 + 1]
+  ldr r4, [r9], #4            @ coefficient[0, 1]
+  ldr r11, [r3], #4
+  ldr r5, [r9], #4
+  smuad r2, r10, r4
+  smlad r2, r11, r5, r2
+
+  ldr r10, [r3], #4
+  ldr r4, [r9], #4
+  ldr r11, [r3], #4
+  ldr r5, [r9], #4
+  smlad r2, r10, r4, r2
+  ldrh r10, [r3], #-14        @ r3 back to &ubufQQpos2[*index2].
+  ldrh  r4, [r9], #-16        @ r9 back to &coefficient[0].
+  smlad r2, r11, r5, r2
+  smlabb r2, r10, r4, r2
+
+  @ Saturate to avoid overflow in tmpW16.
+  asr r2, #1
+  add r4, r2, #0x1000
+  ssat r7, #16, r4, asr #13
+
+  @ Shift low pass filter state, and excute the low pass filter.
+  @ The memmove() and the low pass filter loop are unrolled and mixed.
+  smulbb r5, r1, r7
+  add r7, r5, #0x800
+  asr r7, #12                 @ Get the value for inputState[0].
+  ldr r11, [sp, #40]          @ inputState
+  pld [r11]
+  adr r10, kDampFilter
+  ldrsh r4, [r10], #2         @ kDampFilter[0]
+  mul r2, r7, r4
+  ldr r4, [r11]               @ inputState[0, 1], before shift.
+  strh r7, [r11]              @ inputState[0], after shift.
+  ldr r5, [r11, #4]           @ inputState[2, 3], before shift.
+  ldr r7, [r10], #4           @ kDampFilter[1, 2]
+  ldr r10, [r10]              @ kDampFilter[3, 4]
+  str r4, [r11, #2]           @ inputState[1, 2], after shift.
+  str r5, [r11, #6]           @ inputState[3, 4], after shift.
+  smlad r2, r4, r7, r2
+  smlad r2, r5, r10, r2
+
+  @ Saturate to avoid overflow.
+  @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
+  @ to avoid overflow in the next saturation step.
+  asr r2, #1
+  add r10, r2, #0x2000
+  ssat r10, #16, r10, asr #14
+
+  @ Subtract from input and update buffer.
+  ldr r11, [sp, #4]           @ sign
+  ldrsh r4, [r8]
+  ldrsh r7, [r8], #2          @ inputBuf[*index2]
+  smulbb r5, r11, r10
+  subs r0, #1
+  sub r4, r5
+  ssat r2, #16, r4
+  strh  r2, [r12], #2         @ outputBuf[*index2]
+
+  add r2, r7
+  ssat r2, #16, r2
+  strh  r2, [r6], #2          @ outputBuff2[*index2 + PITCH_BUFFSIZE]
+  bgt LOOP
+
+  add sp, #8
+  pop {r4-r11}
+  bx  lr
+.fnend
+
+.align  2
+kDampFilter:
+  .short  -2294, 8192, 20972, 8192, -2294