Assembly coding for pitch filter in iSAC for ARMv6.
Review URL: https://webrtc-codereview.appspot.com/631004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2501 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -1,4 +1,4 @@ | |||||||
| # Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | # Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||||
| # | # | ||||||
| # Use of this source code is governed by a BSD-style license | # Use of this source code is governed by a BSD-style license | ||||||
| # that can be found in the LICENSE file in the root of the source | # that can be found in the LICENSE file in the root of the source | ||||||
| @@ -46,8 +46,10 @@ LOCAL_SRC_FILES := \ | |||||||
|     transform.c |     transform.c | ||||||
|  |  | ||||||
| ifeq ($(ARCH_ARM_HAVE_ARMV7A),true) | ifeq ($(ARCH_ARM_HAVE_ARMV7A),true) | ||||||
|  | # Using .S (instead of .s) extention is to include a C header file in assembly. | ||||||
| LOCAL_SRC_FILES += \ | LOCAL_SRC_FILES += \ | ||||||
|     lattice_armv7.S |     lattice_armv7.S \ | ||||||
|  |     pitchfilter_armv6.S | ||||||
| else | else | ||||||
| LOCAL_SRC_FILES += \ | LOCAL_SRC_FILES += \ | ||||||
|     lattice_c.c |     lattice_c.c | ||||||
| @@ -84,7 +86,7 @@ LOCAL_MODULE := libwebrtc_isacfix_neon | |||||||
| LOCAL_MODULE_TAGS := optional | LOCAL_MODULE_TAGS := optional | ||||||
| LOCAL_SRC_FILES := \ | LOCAL_SRC_FILES := \ | ||||||
|     filters_neon.c \ |     filters_neon.c \ | ||||||
|     lattice_neon.S #.S extention is for including a header file in assembly. |     lattice_neon.S | ||||||
|  |  | ||||||
| # Flags passed to both C and C++ files. | # Flags passed to both C and C++ files. | ||||||
| LOCAL_CFLAGS := \ | LOCAL_CFLAGS := \ | ||||||
|   | |||||||
| @@ -42,6 +42,17 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16 *indatFix, | |||||||
|                                WebRtc_Word16 *gainsQ12, |                                WebRtc_Word16 *gainsQ12, | ||||||
|                                WebRtc_Word16 type); |                                WebRtc_Word16 type); | ||||||
|  |  | ||||||
|  | void WebRtcIsacfix_PitchFilterCore(int loopNumber, | ||||||
|  |                                    WebRtc_Word16 gain, | ||||||
|  |                                    int index, | ||||||
|  |                                    WebRtc_Word16 sign, | ||||||
|  |                                    WebRtc_Word16* inputState, | ||||||
|  |                                    WebRtc_Word16* outputBuff2, | ||||||
|  |                                    const WebRtc_Word16* coefficient, | ||||||
|  |                                    WebRtc_Word16* inputBuf, | ||||||
|  |                                    WebRtc_Word16* outputBuf, | ||||||
|  |                                    int* index2); | ||||||
|  |  | ||||||
| void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0, | void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0, | ||||||
|                                     PitchFiltstr *pfp, |                                     PitchFiltstr *pfp, | ||||||
|                                     WebRtc_Word16 *lagsQ7, |                                     WebRtc_Word16 *lagsQ7, | ||||||
|   | |||||||
| @@ -55,9 +55,10 @@ static __inline WebRtc_Word32 CalcLrIntQ(WebRtc_Word32 fixVal, | |||||||
|   return intgr; |   return intgr; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #ifndef WEBRTC_ARCH_ARM_V7A | ||||||
| // Pitch filtering. | // Pitch filtering. | ||||||
| // TODO(Turaj): Add descriptions of input and output parameters. | // TODO(Turaj): Add descriptions of input and output parameters. | ||||||
| static void PitchFilter(int loopNumber, | void WebRtcIsacfix_PitchFilterCore(int loopNumber, | ||||||
|                                    WebRtc_Word16 gain, |                                    WebRtc_Word16 gain, | ||||||
|                                    int index, |                                    int index, | ||||||
|                                    WebRtc_Word16 sign, |                                    WebRtc_Word16 sign, | ||||||
| @@ -112,6 +113,11 @@ static void PitchFilter(int loopNumber, | |||||||
|     (*index2)++; |     (*index2)++; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  | #else | ||||||
|  | // These two conditions are assumptions in ARM assembly file. | ||||||
|  | WEBRTC_STATIC_ASSERT(PITCH_FRACORDER, PITCH_FRACORDER == 9); | ||||||
|  | WEBRTC_STATIC_ASSERT(PITCH_DAMPORDER, PITCH_DAMPORDER == 5); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4, | void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4, | ||||||
|                                                        // Q0 if type is 2. |                                                        // Q0 if type is 2. | ||||||
| @@ -192,8 +198,8 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4, | |||||||
|       fracoeffQQ = kIntrpCoef[frcQQ]; |       fracoeffQQ = kIntrpCoef[frcQQ]; | ||||||
|  |  | ||||||
|       // Pitch filtering. |       // Pitch filtering. | ||||||
|       PitchFilter(PITCH_SUBFRAME_LEN / kSegments, curGainQ12, indW32, sign, |       WebRtcIsacfix_PitchFilterCore(PITCH_SUBFRAME_LEN / kSegments, curGainQ12, | ||||||
|           inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind); |         indW32, sign, inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -206,7 +212,7 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4, | |||||||
|  |  | ||||||
|   if (type == 2) { |   if (type == 2) { | ||||||
|     // Filter look-ahead segment. |     // Filter look-ahead segment. | ||||||
|     PitchFilter(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ, |     WebRtcIsacfix_PitchFilterCore(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ, | ||||||
|                 ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind); |                 ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -0,0 +1,147 @@ | |||||||
|  | @ | ||||||
|  | @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||||
|  | @ | ||||||
|  | @ Use of this source code is governed by a BSD-style license | ||||||
|  | @ that can be found in the LICENSE file in the root of the source | ||||||
|  | @ tree. An additional intellectual property rights grant can be found | ||||||
|  | @ in the file PATENTS.  All contributing project authors may | ||||||
|  | @ be found in the AUTHORS file in the root of the source tree. | ||||||
|  | @ | ||||||
|  |  | ||||||
|  | @ Contains the core loop routine for the pitch filter function in iSAC, | ||||||
|  | @ optimized for ARMv7 platforms. | ||||||
|  | @ | ||||||
|  | @ Output is bit-exact with the reference C code in pitch_filter.c. | ||||||
|  |  | ||||||
|  | #include "settings.h" | ||||||
|  |  | ||||||
|  | .arch armv6 | ||||||
|  | .align  2 | ||||||
|  | .global WebRtcIsacfix_PitchFilterCore | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @ void WebRtcIsacfix_PitchFilterCore(int loopNumber, | ||||||
|  | @                                    WebRtc_Word16 gain, | ||||||
|  | @                                    int index, | ||||||
|  | @                                    WebRtc_Word16 sign, | ||||||
|  | @                                    WebRtc_Word16* inputState, | ||||||
|  | @                                    WebRtc_Word16* outputBuf2, | ||||||
|  | @                                    const WebRtc_Word16* coefficient, | ||||||
|  | @                                    WebRtc_Word16* inputBuf, | ||||||
|  | @                                    WebRtc_Word16* outputBuf, | ||||||
|  | @                                    int* index2) { | ||||||
|  |  | ||||||
|  | WebRtcIsacfix_PitchFilterCore: | ||||||
|  | .fnstart | ||||||
|  |   push {r4-r11} | ||||||
|  |   sub sp, #8 | ||||||
|  |  | ||||||
|  |   str r0, [sp]                @ loopNumber | ||||||
|  |   str r3, [sp, #4]            @ sign | ||||||
|  |   ldr r3, [sp, #44]           @ outputBuf2 | ||||||
|  |   ldr r6, [sp, #60]           @ index2 | ||||||
|  |   ldr r7, [r6]                @ *index2 | ||||||
|  |   ldr r8, [sp, #52]           @ inputBuf | ||||||
|  |   ldr r12, [sp, #56]          @ outputBuf | ||||||
|  |  | ||||||
|  |   add r4, r7, r0 | ||||||
|  |   str r4, [r6]                @ Store return value to index2. | ||||||
|  |  | ||||||
|  |   mov r10, r7, asl #1 | ||||||
|  |   add r12, r10                @ &outputBuf[*index2] | ||||||
|  |   add r8, r10                 @ &inputBuf[*index2] | ||||||
|  |  | ||||||
|  |   add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE | ||||||
|  |   add r6, r3, r4, lsl #1      @ &outputBuf2[*index2 + PITCH_BUFFSIZE] | ||||||
|  |   sub r4, r2                  @ r2: index | ||||||
|  |   sub r4, #2                  @ *index2 + PITCH_BUFFSIZE - index - 2 | ||||||
|  |   add r3, r4, lsl #1          @ &ubufQQpos2[*index2] | ||||||
|  |   ldr r9, [sp, #48]           @ coefficient | ||||||
|  |  | ||||||
|  | LOOP: | ||||||
|  | @ Usage of registers in the loop: | ||||||
|  | @  r0: loop counter | ||||||
|  | @  r1: gain | ||||||
|  | @  r2: tmpW32 | ||||||
|  | @  r3: &ubufQQpos2[] | ||||||
|  | @  r6: &outputBuf2[] | ||||||
|  | @  r8: &inputBuf[] | ||||||
|  | @  r9: &coefficient[] | ||||||
|  | @  r12: &outputBuf[] | ||||||
|  | @  r4, r5, r7, r10, r11: scratch | ||||||
|  |  | ||||||
|  |   @ Filter to get fractional pitch. | ||||||
|  |   @ The pitch filter loop here is unrolled with 9 multipications. | ||||||
|  |   pld [r3] | ||||||
|  |   ldr r10, [r3], #4           @ ubufQQpos2[*index2 + 0, *index2 + 1] | ||||||
|  |   ldr r4, [r9], #4            @ coefficient[0, 1] | ||||||
|  |   ldr r11, [r3], #4 | ||||||
|  |   ldr r5, [r9], #4 | ||||||
|  |   smuad r2, r10, r4 | ||||||
|  |   smlad r2, r11, r5, r2 | ||||||
|  |  | ||||||
|  |   ldr r10, [r3], #4 | ||||||
|  |   ldr r4, [r9], #4 | ||||||
|  |   ldr r11, [r3], #4 | ||||||
|  |   ldr r5, [r9], #4 | ||||||
|  |   smlad r2, r10, r4, r2 | ||||||
|  |   ldrh r10, [r3], #-14        @ r3 back to &ubufQQpos2[*index2]. | ||||||
|  |   ldrh  r4, [r9], #-16        @ r9 back to &coefficient[0]. | ||||||
|  |   smlad r2, r11, r5, r2 | ||||||
|  |   smlabb r2, r10, r4, r2 | ||||||
|  |  | ||||||
|  |   @ Saturate to avoid overflow in tmpW16. | ||||||
|  |   asr r2, #1 | ||||||
|  |   add r4, r2, #0x1000 | ||||||
|  |   ssat r7, #16, r4, asr #13 | ||||||
|  |  | ||||||
|  |   @ Shift low pass filter state, and excute the low pass filter. | ||||||
|  |   @ The memmove() and the low pass filter loop are unrolled and mixed. | ||||||
|  |   smulbb r5, r1, r7 | ||||||
|  |   add r7, r5, #0x800 | ||||||
|  |   asr r7, #12                 @ Get the value for inputState[0]. | ||||||
|  |   ldr r11, [sp, #40]          @ inputState | ||||||
|  |   pld [r11] | ||||||
|  |   adr r10, kDampFilter | ||||||
|  |   ldrsh r4, [r10], #2         @ kDampFilter[0] | ||||||
|  |   mul r2, r7, r4 | ||||||
|  |   ldr r4, [r11]               @ inputState[0, 1], before shift. | ||||||
|  |   strh r7, [r11]              @ inputState[0], after shift. | ||||||
|  |   ldr r5, [r11, #4]           @ inputState[2, 3], before shift. | ||||||
|  |   ldr r7, [r10], #4           @ kDampFilter[1, 2] | ||||||
|  |   ldr r10, [r10]              @ kDampFilter[3, 4] | ||||||
|  |   str r4, [r11, #2]           @ inputState[1, 2], after shift. | ||||||
|  |   str r5, [r11, #6]           @ inputState[3, 4], after shift. | ||||||
|  |   smlad r2, r4, r7, r2 | ||||||
|  |   smlad r2, r5, r10, r2 | ||||||
|  |  | ||||||
|  |   @ Saturate to avoid overflow. | ||||||
|  |   @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF], | ||||||
|  |   @ to avoid overflow in the next saturation step. | ||||||
|  |   asr r2, #1 | ||||||
|  |   add r10, r2, #0x2000 | ||||||
|  |   ssat r10, #16, r10, asr #14 | ||||||
|  |  | ||||||
|  |   @ Subtract from input and update buffer. | ||||||
|  |   ldr r11, [sp, #4]           @ sign | ||||||
|  |   ldrsh r4, [r8] | ||||||
|  |   ldrsh r7, [r8], #2          @ inputBuf[*index2] | ||||||
|  |   smulbb r5, r11, r10 | ||||||
|  |   subs r0, #1 | ||||||
|  |   sub r4, r5 | ||||||
|  |   ssat r2, #16, r4 | ||||||
|  |   strh  r2, [r12], #2         @ outputBuf[*index2] | ||||||
|  |  | ||||||
|  |   add r2, r7 | ||||||
|  |   ssat r2, #16, r2 | ||||||
|  |   strh  r2, [r6], #2          @ outputBuff2[*index2 + PITCH_BUFFSIZE] | ||||||
|  |   bgt LOOP | ||||||
|  |  | ||||||
|  |   add sp, #8 | ||||||
|  |   pop {r4-r11} | ||||||
|  |   bx  lr | ||||||
|  | .fnend | ||||||
|  |  | ||||||
|  | .align  2 | ||||||
|  | kDampFilter: | ||||||
|  |   .short  -2294, 8192, 20972, 8192, -2294 | ||||||
		Reference in New Issue
	
	Block a user
	 kma@webrtc.org
					kma@webrtc.org