Assembly coding for pitch filter in iSAC for ARMv6.

Review URL: https://webrtc-codereview.appspot.com/631004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2501 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2012-07-10 19:30:57 +00:00
parent e2c16a83bc
commit adf8ddf4aa
4 changed files with 182 additions and 16 deletions

View File

@ -1,4 +1,4 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
@ -46,8 +46,10 @@ LOCAL_SRC_FILES := \
transform.c
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
# Using .S (instead of .s) extention is to include a C header file in assembly.
LOCAL_SRC_FILES += \
lattice_armv7.S
lattice_armv7.S \
pitchfilter_armv6.S
else
LOCAL_SRC_FILES += \
lattice_c.c
@ -84,7 +86,7 @@ LOCAL_MODULE := libwebrtc_isacfix_neon
LOCAL_MODULE_TAGS := optional
LOCAL_SRC_FILES := \
filters_neon.c \
lattice_neon.S #.S extention is for including a header file in assembly.
lattice_neon.S
# Flags passed to both C and C++ files.
LOCAL_CFLAGS := \

View File

@ -42,6 +42,17 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16 *indatFix,
WebRtc_Word16 *gainsQ12,
WebRtc_Word16 type);
void WebRtcIsacfix_PitchFilterCore(int loopNumber,
WebRtc_Word16 gain,
int index,
WebRtc_Word16 sign,
WebRtc_Word16* inputState,
WebRtc_Word16* outputBuff2,
const WebRtc_Word16* coefficient,
WebRtc_Word16* inputBuf,
WebRtc_Word16* outputBuf,
int* index2);
void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0,
PitchFiltstr *pfp,
WebRtc_Word16 *lagsQ7,

View File

@ -55,18 +55,19 @@ static __inline WebRtc_Word32 CalcLrIntQ(WebRtc_Word32 fixVal,
return intgr;
}
#ifndef WEBRTC_ARCH_ARM_V7A
// Pitch filtering.
// TODO(Turaj): Add descriptions of input and output parameters.
static void PitchFilter(int loopNumber,
WebRtc_Word16 gain,
int index,
WebRtc_Word16 sign,
WebRtc_Word16* inputState,
WebRtc_Word16* outputBuf2,
const WebRtc_Word16* coefficient,
WebRtc_Word16* inputBuf,
WebRtc_Word16* outputBuf,
int* index2) {
void WebRtcIsacfix_PitchFilterCore(int loopNumber,
WebRtc_Word16 gain,
int index,
WebRtc_Word16 sign,
WebRtc_Word16* inputState,
WebRtc_Word16* outputBuf2,
const WebRtc_Word16* coefficient,
WebRtc_Word16* inputBuf,
WebRtc_Word16* outputBuf,
int* index2) {
int i = 0, j = 0; // Loop counters.
WebRtc_Word16* ubufQQpos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)];
WebRtc_Word16 tmpW16 = 0;
@ -112,6 +113,11 @@ static void PitchFilter(int loopNumber,
(*index2)++;
}
}
#else
// These two conditions are assumptions in ARM assembly file.
WEBRTC_STATIC_ASSERT(PITCH_FRACORDER, PITCH_FRACORDER == 9);
WEBRTC_STATIC_ASSERT(PITCH_DAMPORDER, PITCH_DAMPORDER == 5);
#endif
void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
// Q0 if type is 2.
@ -192,8 +198,8 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
fracoeffQQ = kIntrpCoef[frcQQ];
// Pitch filtering.
PitchFilter(PITCH_SUBFRAME_LEN / kSegments, curGainQ12, indW32, sign,
inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
WebRtcIsacfix_PitchFilterCore(PITCH_SUBFRAME_LEN / kSegments, curGainQ12,
indW32, sign, inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
}
}
@ -206,7 +212,7 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
if (type == 2) {
// Filter look-ahead segment.
PitchFilter(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
WebRtcIsacfix_PitchFilterCore(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
}
}

View File

@ -0,0 +1,147 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ Contains the core loop routine for the pitch filter function in iSAC,
@ optimized for ARMv7 platforms.
@
@ Output is bit-exact with the reference C code in pitch_filter.c.
#include "settings.h"
.arch armv6
.align 2
.global WebRtcIsacfix_PitchFilterCore
@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
@ WebRtc_Word16 gain,
@ int index,
@ WebRtc_Word16 sign,
@ WebRtc_Word16* inputState,
@ WebRtc_Word16* outputBuf2,
@ const WebRtc_Word16* coefficient,
@ WebRtc_Word16* inputBuf,
@ WebRtc_Word16* outputBuf,
@ int* index2) {
WebRtcIsacfix_PitchFilterCore:
.fnstart
push {r4-r11}
sub sp, #8
str r0, [sp] @ loopNumber
str r3, [sp, #4] @ sign
ldr r3, [sp, #44] @ outputBuf2
ldr r6, [sp, #60] @ index2
ldr r7, [r6] @ *index2
ldr r8, [sp, #52] @ inputBuf
ldr r12, [sp, #56] @ outputBuf
add r4, r7, r0
str r4, [r6] @ Store return value to index2.
mov r10, r7, asl #1
add r12, r10 @ &outputBuf[*index2]
add r8, r10 @ &inputBuf[*index2]
add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
sub r4, r2 @ r2: index
sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2
add r3, r4, lsl #1 @ &ubufQQpos2[*index2]
ldr r9, [sp, #48] @ coefficient
LOOP:
@ Usage of registers in the loop:
@ r0: loop counter
@ r1: gain
@ r2: tmpW32
@ r3: &ubufQQpos2[]
@ r6: &outputBuf2[]
@ r8: &inputBuf[]
@ r9: &coefficient[]
@ r12: &outputBuf[]
@ r4, r5, r7, r10, r11: scratch
@ Filter to get fractional pitch.
@ The pitch filter loop here is unrolled with 9 multipications.
pld [r3]
ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1]
ldr r4, [r9], #4 @ coefficient[0, 1]
ldr r11, [r3], #4
ldr r5, [r9], #4
smuad r2, r10, r4
smlad r2, r11, r5, r2
ldr r10, [r3], #4
ldr r4, [r9], #4
ldr r11, [r3], #4
ldr r5, [r9], #4
smlad r2, r10, r4, r2
ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2].
ldrh r4, [r9], #-16 @ r9 back to &coefficient[0].
smlad r2, r11, r5, r2
smlabb r2, r10, r4, r2
@ Saturate to avoid overflow in tmpW16.
asr r2, #1
add r4, r2, #0x1000
ssat r7, #16, r4, asr #13
@ Shift low pass filter state, and excute the low pass filter.
@ The memmove() and the low pass filter loop are unrolled and mixed.
smulbb r5, r1, r7
add r7, r5, #0x800
asr r7, #12 @ Get the value for inputState[0].
ldr r11, [sp, #40] @ inputState
pld [r11]
adr r10, kDampFilter
ldrsh r4, [r10], #2 @ kDampFilter[0]
mul r2, r7, r4
ldr r4, [r11] @ inputState[0, 1], before shift.
strh r7, [r11] @ inputState[0], after shift.
ldr r5, [r11, #4] @ inputState[2, 3], before shift.
ldr r7, [r10], #4 @ kDampFilter[1, 2]
ldr r10, [r10] @ kDampFilter[3, 4]
str r4, [r11, #2] @ inputState[1, 2], after shift.
str r5, [r11, #6] @ inputState[3, 4], after shift.
smlad r2, r4, r7, r2
smlad r2, r5, r10, r2
@ Saturate to avoid overflow.
@ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
@ to avoid overflow in the next saturation step.
asr r2, #1
add r10, r2, #0x2000
ssat r10, #16, r10, asr #14
@ Subtract from input and update buffer.
ldr r11, [sp, #4] @ sign
ldrsh r4, [r8]
ldrsh r7, [r8], #2 @ inputBuf[*index2]
smulbb r5, r11, r10
subs r0, #1
sub r4, r5
ssat r2, #16, r4
strh r2, [r12], #2 @ outputBuf[*index2]
add r2, r7
ssat r2, #16, r2
strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE]
bgt LOOP
add sp, #8
pop {r4-r11}
bx lr
.fnend
.align 2
kDampFilter:
.short -2294, 8192, 20972, 8192, -2294