Assembly coding for pitch filter in iSAC for ARMv6.
Review URL: https://webrtc-codereview.appspot.com/631004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2501 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
e2c16a83bc
commit
adf8ddf4aa
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
@ -46,8 +46,10 @@ LOCAL_SRC_FILES := \
|
||||
transform.c
|
||||
|
||||
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
||||
# Using .S (instead of .s) extention is to include a C header file in assembly.
|
||||
LOCAL_SRC_FILES += \
|
||||
lattice_armv7.S
|
||||
lattice_armv7.S \
|
||||
pitchfilter_armv6.S
|
||||
else
|
||||
LOCAL_SRC_FILES += \
|
||||
lattice_c.c
|
||||
@ -84,7 +86,7 @@ LOCAL_MODULE := libwebrtc_isacfix_neon
|
||||
LOCAL_MODULE_TAGS := optional
|
||||
LOCAL_SRC_FILES := \
|
||||
filters_neon.c \
|
||||
lattice_neon.S #.S extention is for including a header file in assembly.
|
||||
lattice_neon.S
|
||||
|
||||
# Flags passed to both C and C++ files.
|
||||
LOCAL_CFLAGS := \
|
||||
|
@ -42,6 +42,17 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16 *indatFix,
|
||||
WebRtc_Word16 *gainsQ12,
|
||||
WebRtc_Word16 type);
|
||||
|
||||
void WebRtcIsacfix_PitchFilterCore(int loopNumber,
|
||||
WebRtc_Word16 gain,
|
||||
int index,
|
||||
WebRtc_Word16 sign,
|
||||
WebRtc_Word16* inputState,
|
||||
WebRtc_Word16* outputBuff2,
|
||||
const WebRtc_Word16* coefficient,
|
||||
WebRtc_Word16* inputBuf,
|
||||
WebRtc_Word16* outputBuf,
|
||||
int* index2);
|
||||
|
||||
void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0,
|
||||
PitchFiltstr *pfp,
|
||||
WebRtc_Word16 *lagsQ7,
|
||||
|
@ -55,18 +55,19 @@ static __inline WebRtc_Word32 CalcLrIntQ(WebRtc_Word32 fixVal,
|
||||
return intgr;
|
||||
}
|
||||
|
||||
#ifndef WEBRTC_ARCH_ARM_V7A
|
||||
// Pitch filtering.
|
||||
// TODO(Turaj): Add descriptions of input and output parameters.
|
||||
static void PitchFilter(int loopNumber,
|
||||
WebRtc_Word16 gain,
|
||||
int index,
|
||||
WebRtc_Word16 sign,
|
||||
WebRtc_Word16* inputState,
|
||||
WebRtc_Word16* outputBuf2,
|
||||
const WebRtc_Word16* coefficient,
|
||||
WebRtc_Word16* inputBuf,
|
||||
WebRtc_Word16* outputBuf,
|
||||
int* index2) {
|
||||
void WebRtcIsacfix_PitchFilterCore(int loopNumber,
|
||||
WebRtc_Word16 gain,
|
||||
int index,
|
||||
WebRtc_Word16 sign,
|
||||
WebRtc_Word16* inputState,
|
||||
WebRtc_Word16* outputBuf2,
|
||||
const WebRtc_Word16* coefficient,
|
||||
WebRtc_Word16* inputBuf,
|
||||
WebRtc_Word16* outputBuf,
|
||||
int* index2) {
|
||||
int i = 0, j = 0; // Loop counters.
|
||||
WebRtc_Word16* ubufQQpos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)];
|
||||
WebRtc_Word16 tmpW16 = 0;
|
||||
@ -112,6 +113,11 @@ static void PitchFilter(int loopNumber,
|
||||
(*index2)++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// These two conditions are assumptions in ARM assembly file.
|
||||
WEBRTC_STATIC_ASSERT(PITCH_FRACORDER, PITCH_FRACORDER == 9);
|
||||
WEBRTC_STATIC_ASSERT(PITCH_DAMPORDER, PITCH_DAMPORDER == 5);
|
||||
#endif
|
||||
|
||||
void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
||||
// Q0 if type is 2.
|
||||
@ -192,8 +198,8 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
||||
fracoeffQQ = kIntrpCoef[frcQQ];
|
||||
|
||||
// Pitch filtering.
|
||||
PitchFilter(PITCH_SUBFRAME_LEN / kSegments, curGainQ12, indW32, sign,
|
||||
inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
||||
WebRtcIsacfix_PitchFilterCore(PITCH_SUBFRAME_LEN / kSegments, curGainQ12,
|
||||
indW32, sign, inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
||||
}
|
||||
}
|
||||
|
||||
@ -206,7 +212,7 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
||||
|
||||
if (type == 2) {
|
||||
// Filter look-ahead segment.
|
||||
PitchFilter(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
|
||||
WebRtcIsacfix_PitchFilterCore(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
|
||||
ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,147 @@
|
||||
@
|
||||
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
@
|
||||
|
||||
@ Contains the core loop routine for the pitch filter function in iSAC,
|
||||
@ optimized for ARMv7 platforms.
|
||||
@
|
||||
@ Output is bit-exact with the reference C code in pitch_filter.c.
|
||||
|
||||
#include "settings.h"
|
||||
|
||||
.arch armv6
|
||||
.align 2
|
||||
.global WebRtcIsacfix_PitchFilterCore
|
||||
|
||||
|
||||
@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
|
||||
@ WebRtc_Word16 gain,
|
||||
@ int index,
|
||||
@ WebRtc_Word16 sign,
|
||||
@ WebRtc_Word16* inputState,
|
||||
@ WebRtc_Word16* outputBuf2,
|
||||
@ const WebRtc_Word16* coefficient,
|
||||
@ WebRtc_Word16* inputBuf,
|
||||
@ WebRtc_Word16* outputBuf,
|
||||
@ int* index2) {
|
||||
|
||||
WebRtcIsacfix_PitchFilterCore:
|
||||
.fnstart
|
||||
push {r4-r11}
|
||||
sub sp, #8
|
||||
|
||||
str r0, [sp] @ loopNumber
|
||||
str r3, [sp, #4] @ sign
|
||||
ldr r3, [sp, #44] @ outputBuf2
|
||||
ldr r6, [sp, #60] @ index2
|
||||
ldr r7, [r6] @ *index2
|
||||
ldr r8, [sp, #52] @ inputBuf
|
||||
ldr r12, [sp, #56] @ outputBuf
|
||||
|
||||
add r4, r7, r0
|
||||
str r4, [r6] @ Store return value to index2.
|
||||
|
||||
mov r10, r7, asl #1
|
||||
add r12, r10 @ &outputBuf[*index2]
|
||||
add r8, r10 @ &inputBuf[*index2]
|
||||
|
||||
add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
|
||||
add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
|
||||
sub r4, r2 @ r2: index
|
||||
sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2
|
||||
add r3, r4, lsl #1 @ &ubufQQpos2[*index2]
|
||||
ldr r9, [sp, #48] @ coefficient
|
||||
|
||||
LOOP:
|
||||
@ Usage of registers in the loop:
|
||||
@ r0: loop counter
|
||||
@ r1: gain
|
||||
@ r2: tmpW32
|
||||
@ r3: &ubufQQpos2[]
|
||||
@ r6: &outputBuf2[]
|
||||
@ r8: &inputBuf[]
|
||||
@ r9: &coefficient[]
|
||||
@ r12: &outputBuf[]
|
||||
@ r4, r5, r7, r10, r11: scratch
|
||||
|
||||
@ Filter to get fractional pitch.
|
||||
@ The pitch filter loop here is unrolled with 9 multipications.
|
||||
pld [r3]
|
||||
ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1]
|
||||
ldr r4, [r9], #4 @ coefficient[0, 1]
|
||||
ldr r11, [r3], #4
|
||||
ldr r5, [r9], #4
|
||||
smuad r2, r10, r4
|
||||
smlad r2, r11, r5, r2
|
||||
|
||||
ldr r10, [r3], #4
|
||||
ldr r4, [r9], #4
|
||||
ldr r11, [r3], #4
|
||||
ldr r5, [r9], #4
|
||||
smlad r2, r10, r4, r2
|
||||
ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2].
|
||||
ldrh r4, [r9], #-16 @ r9 back to &coefficient[0].
|
||||
smlad r2, r11, r5, r2
|
||||
smlabb r2, r10, r4, r2
|
||||
|
||||
@ Saturate to avoid overflow in tmpW16.
|
||||
asr r2, #1
|
||||
add r4, r2, #0x1000
|
||||
ssat r7, #16, r4, asr #13
|
||||
|
||||
@ Shift low pass filter state, and excute the low pass filter.
|
||||
@ The memmove() and the low pass filter loop are unrolled and mixed.
|
||||
smulbb r5, r1, r7
|
||||
add r7, r5, #0x800
|
||||
asr r7, #12 @ Get the value for inputState[0].
|
||||
ldr r11, [sp, #40] @ inputState
|
||||
pld [r11]
|
||||
adr r10, kDampFilter
|
||||
ldrsh r4, [r10], #2 @ kDampFilter[0]
|
||||
mul r2, r7, r4
|
||||
ldr r4, [r11] @ inputState[0, 1], before shift.
|
||||
strh r7, [r11] @ inputState[0], after shift.
|
||||
ldr r5, [r11, #4] @ inputState[2, 3], before shift.
|
||||
ldr r7, [r10], #4 @ kDampFilter[1, 2]
|
||||
ldr r10, [r10] @ kDampFilter[3, 4]
|
||||
str r4, [r11, #2] @ inputState[1, 2], after shift.
|
||||
str r5, [r11, #6] @ inputState[3, 4], after shift.
|
||||
smlad r2, r4, r7, r2
|
||||
smlad r2, r5, r10, r2
|
||||
|
||||
@ Saturate to avoid overflow.
|
||||
@ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
|
||||
@ to avoid overflow in the next saturation step.
|
||||
asr r2, #1
|
||||
add r10, r2, #0x2000
|
||||
ssat r10, #16, r10, asr #14
|
||||
|
||||
@ Subtract from input and update buffer.
|
||||
ldr r11, [sp, #4] @ sign
|
||||
ldrsh r4, [r8]
|
||||
ldrsh r7, [r8], #2 @ inputBuf[*index2]
|
||||
smulbb r5, r11, r10
|
||||
subs r0, #1
|
||||
sub r4, r5
|
||||
ssat r2, #16, r4
|
||||
strh r2, [r12], #2 @ outputBuf[*index2]
|
||||
|
||||
add r2, r7
|
||||
ssat r2, #16, r2
|
||||
strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE]
|
||||
bgt LOOP
|
||||
|
||||
add sp, #8
|
||||
pop {r4-r11}
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
.align 2
|
||||
kDampFilter:
|
||||
.short -2294, 8192, 20972, 8192, -2294
|
Loading…
x
Reference in New Issue
Block a user