Assembly coding for pitch filter in iSAC for ARMv6.
Review URL: https://webrtc-codereview.appspot.com/631004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2501 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
e2c16a83bc
commit
adf8ddf4aa
@ -1,4 +1,4 @@
|
|||||||
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# Use of this source code is governed by a BSD-style license
|
# Use of this source code is governed by a BSD-style license
|
||||||
# that can be found in the LICENSE file in the root of the source
|
# that can be found in the LICENSE file in the root of the source
|
||||||
@ -46,8 +46,10 @@ LOCAL_SRC_FILES := \
|
|||||||
transform.c
|
transform.c
|
||||||
|
|
||||||
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
||||||
|
# Using .S (instead of .s) extention is to include a C header file in assembly.
|
||||||
LOCAL_SRC_FILES += \
|
LOCAL_SRC_FILES += \
|
||||||
lattice_armv7.S
|
lattice_armv7.S \
|
||||||
|
pitchfilter_armv6.S
|
||||||
else
|
else
|
||||||
LOCAL_SRC_FILES += \
|
LOCAL_SRC_FILES += \
|
||||||
lattice_c.c
|
lattice_c.c
|
||||||
@ -84,7 +86,7 @@ LOCAL_MODULE := libwebrtc_isacfix_neon
|
|||||||
LOCAL_MODULE_TAGS := optional
|
LOCAL_MODULE_TAGS := optional
|
||||||
LOCAL_SRC_FILES := \
|
LOCAL_SRC_FILES := \
|
||||||
filters_neon.c \
|
filters_neon.c \
|
||||||
lattice_neon.S #.S extention is for including a header file in assembly.
|
lattice_neon.S
|
||||||
|
|
||||||
# Flags passed to both C and C++ files.
|
# Flags passed to both C and C++ files.
|
||||||
LOCAL_CFLAGS := \
|
LOCAL_CFLAGS := \
|
||||||
|
@ -42,6 +42,17 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16 *indatFix,
|
|||||||
WebRtc_Word16 *gainsQ12,
|
WebRtc_Word16 *gainsQ12,
|
||||||
WebRtc_Word16 type);
|
WebRtc_Word16 type);
|
||||||
|
|
||||||
|
void WebRtcIsacfix_PitchFilterCore(int loopNumber,
|
||||||
|
WebRtc_Word16 gain,
|
||||||
|
int index,
|
||||||
|
WebRtc_Word16 sign,
|
||||||
|
WebRtc_Word16* inputState,
|
||||||
|
WebRtc_Word16* outputBuff2,
|
||||||
|
const WebRtc_Word16* coefficient,
|
||||||
|
WebRtc_Word16* inputBuf,
|
||||||
|
WebRtc_Word16* outputBuf,
|
||||||
|
int* index2);
|
||||||
|
|
||||||
void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0,
|
void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0,
|
||||||
PitchFiltstr *pfp,
|
PitchFiltstr *pfp,
|
||||||
WebRtc_Word16 *lagsQ7,
|
WebRtc_Word16 *lagsQ7,
|
||||||
|
@ -55,18 +55,19 @@ static __inline WebRtc_Word32 CalcLrIntQ(WebRtc_Word32 fixVal,
|
|||||||
return intgr;
|
return intgr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef WEBRTC_ARCH_ARM_V7A
|
||||||
// Pitch filtering.
|
// Pitch filtering.
|
||||||
// TODO(Turaj): Add descriptions of input and output parameters.
|
// TODO(Turaj): Add descriptions of input and output parameters.
|
||||||
static void PitchFilter(int loopNumber,
|
void WebRtcIsacfix_PitchFilterCore(int loopNumber,
|
||||||
WebRtc_Word16 gain,
|
WebRtc_Word16 gain,
|
||||||
int index,
|
int index,
|
||||||
WebRtc_Word16 sign,
|
WebRtc_Word16 sign,
|
||||||
WebRtc_Word16* inputState,
|
WebRtc_Word16* inputState,
|
||||||
WebRtc_Word16* outputBuf2,
|
WebRtc_Word16* outputBuf2,
|
||||||
const WebRtc_Word16* coefficient,
|
const WebRtc_Word16* coefficient,
|
||||||
WebRtc_Word16* inputBuf,
|
WebRtc_Word16* inputBuf,
|
||||||
WebRtc_Word16* outputBuf,
|
WebRtc_Word16* outputBuf,
|
||||||
int* index2) {
|
int* index2) {
|
||||||
int i = 0, j = 0; // Loop counters.
|
int i = 0, j = 0; // Loop counters.
|
||||||
WebRtc_Word16* ubufQQpos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)];
|
WebRtc_Word16* ubufQQpos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)];
|
||||||
WebRtc_Word16 tmpW16 = 0;
|
WebRtc_Word16 tmpW16 = 0;
|
||||||
@ -112,6 +113,11 @@ static void PitchFilter(int loopNumber,
|
|||||||
(*index2)++;
|
(*index2)++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// These two conditions are assumptions in ARM assembly file.
|
||||||
|
WEBRTC_STATIC_ASSERT(PITCH_FRACORDER, PITCH_FRACORDER == 9);
|
||||||
|
WEBRTC_STATIC_ASSERT(PITCH_DAMPORDER, PITCH_DAMPORDER == 5);
|
||||||
|
#endif
|
||||||
|
|
||||||
void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
||||||
// Q0 if type is 2.
|
// Q0 if type is 2.
|
||||||
@ -192,8 +198,8 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
|||||||
fracoeffQQ = kIntrpCoef[frcQQ];
|
fracoeffQQ = kIntrpCoef[frcQQ];
|
||||||
|
|
||||||
// Pitch filtering.
|
// Pitch filtering.
|
||||||
PitchFilter(PITCH_SUBFRAME_LEN / kSegments, curGainQ12, indW32, sign,
|
WebRtcIsacfix_PitchFilterCore(PITCH_SUBFRAME_LEN / kSegments, curGainQ12,
|
||||||
inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
indW32, sign, inystateQQ, ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,7 +212,7 @@ void WebRtcIsacfix_PitchFilter(WebRtc_Word16* indatQQ, // Q10 if type is 1 or 4,
|
|||||||
|
|
||||||
if (type == 2) {
|
if (type == 2) {
|
||||||
// Filter look-ahead segment.
|
// Filter look-ahead segment.
|
||||||
PitchFilter(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
|
WebRtcIsacfix_PitchFilterCore(QLOOKAHEAD, curGainQ12, indW32, 1, inystateQQ,
|
||||||
ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
ubufQQ, fracoeffQQ, indatQQ, outdatQQ, &ind);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,147 @@
|
|||||||
|
@
|
||||||
|
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
|
@
|
||||||
|
@ Use of this source code is governed by a BSD-style license
|
||||||
|
@ that can be found in the LICENSE file in the root of the source
|
||||||
|
@ tree. An additional intellectual property rights grant can be found
|
||||||
|
@ in the file PATENTS. All contributing project authors may
|
||||||
|
@ be found in the AUTHORS file in the root of the source tree.
|
||||||
|
@
|
||||||
|
|
||||||
|
@ Contains the core loop routine for the pitch filter function in iSAC,
|
||||||
|
@ optimized for ARMv7 platforms.
|
||||||
|
@
|
||||||
|
@ Output is bit-exact with the reference C code in pitch_filter.c.
|
||||||
|
|
||||||
|
#include "settings.h"
|
||||||
|
|
||||||
|
.arch armv6
|
||||||
|
.align 2
|
||||||
|
.global WebRtcIsacfix_PitchFilterCore
|
||||||
|
|
||||||
|
|
||||||
|
@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
|
||||||
|
@ WebRtc_Word16 gain,
|
||||||
|
@ int index,
|
||||||
|
@ WebRtc_Word16 sign,
|
||||||
|
@ WebRtc_Word16* inputState,
|
||||||
|
@ WebRtc_Word16* outputBuf2,
|
||||||
|
@ const WebRtc_Word16* coefficient,
|
||||||
|
@ WebRtc_Word16* inputBuf,
|
||||||
|
@ WebRtc_Word16* outputBuf,
|
||||||
|
@ int* index2) {
|
||||||
|
|
||||||
|
WebRtcIsacfix_PitchFilterCore:
|
||||||
|
.fnstart
|
||||||
|
push {r4-r11}
|
||||||
|
sub sp, #8
|
||||||
|
|
||||||
|
str r0, [sp] @ loopNumber
|
||||||
|
str r3, [sp, #4] @ sign
|
||||||
|
ldr r3, [sp, #44] @ outputBuf2
|
||||||
|
ldr r6, [sp, #60] @ index2
|
||||||
|
ldr r7, [r6] @ *index2
|
||||||
|
ldr r8, [sp, #52] @ inputBuf
|
||||||
|
ldr r12, [sp, #56] @ outputBuf
|
||||||
|
|
||||||
|
add r4, r7, r0
|
||||||
|
str r4, [r6] @ Store return value to index2.
|
||||||
|
|
||||||
|
mov r10, r7, asl #1
|
||||||
|
add r12, r10 @ &outputBuf[*index2]
|
||||||
|
add r8, r10 @ &inputBuf[*index2]
|
||||||
|
|
||||||
|
add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
|
||||||
|
add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
|
||||||
|
sub r4, r2 @ r2: index
|
||||||
|
sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2
|
||||||
|
add r3, r4, lsl #1 @ &ubufQQpos2[*index2]
|
||||||
|
ldr r9, [sp, #48] @ coefficient
|
||||||
|
|
||||||
|
LOOP:
|
||||||
|
@ Usage of registers in the loop:
|
||||||
|
@ r0: loop counter
|
||||||
|
@ r1: gain
|
||||||
|
@ r2: tmpW32
|
||||||
|
@ r3: &ubufQQpos2[]
|
||||||
|
@ r6: &outputBuf2[]
|
||||||
|
@ r8: &inputBuf[]
|
||||||
|
@ r9: &coefficient[]
|
||||||
|
@ r12: &outputBuf[]
|
||||||
|
@ r4, r5, r7, r10, r11: scratch
|
||||||
|
|
||||||
|
@ Filter to get fractional pitch.
|
||||||
|
@ The pitch filter loop here is unrolled with 9 multipications.
|
||||||
|
pld [r3]
|
||||||
|
ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1]
|
||||||
|
ldr r4, [r9], #4 @ coefficient[0, 1]
|
||||||
|
ldr r11, [r3], #4
|
||||||
|
ldr r5, [r9], #4
|
||||||
|
smuad r2, r10, r4
|
||||||
|
smlad r2, r11, r5, r2
|
||||||
|
|
||||||
|
ldr r10, [r3], #4
|
||||||
|
ldr r4, [r9], #4
|
||||||
|
ldr r11, [r3], #4
|
||||||
|
ldr r5, [r9], #4
|
||||||
|
smlad r2, r10, r4, r2
|
||||||
|
ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2].
|
||||||
|
ldrh r4, [r9], #-16 @ r9 back to &coefficient[0].
|
||||||
|
smlad r2, r11, r5, r2
|
||||||
|
smlabb r2, r10, r4, r2
|
||||||
|
|
||||||
|
@ Saturate to avoid overflow in tmpW16.
|
||||||
|
asr r2, #1
|
||||||
|
add r4, r2, #0x1000
|
||||||
|
ssat r7, #16, r4, asr #13
|
||||||
|
|
||||||
|
@ Shift low pass filter state, and excute the low pass filter.
|
||||||
|
@ The memmove() and the low pass filter loop are unrolled and mixed.
|
||||||
|
smulbb r5, r1, r7
|
||||||
|
add r7, r5, #0x800
|
||||||
|
asr r7, #12 @ Get the value for inputState[0].
|
||||||
|
ldr r11, [sp, #40] @ inputState
|
||||||
|
pld [r11]
|
||||||
|
adr r10, kDampFilter
|
||||||
|
ldrsh r4, [r10], #2 @ kDampFilter[0]
|
||||||
|
mul r2, r7, r4
|
||||||
|
ldr r4, [r11] @ inputState[0, 1], before shift.
|
||||||
|
strh r7, [r11] @ inputState[0], after shift.
|
||||||
|
ldr r5, [r11, #4] @ inputState[2, 3], before shift.
|
||||||
|
ldr r7, [r10], #4 @ kDampFilter[1, 2]
|
||||||
|
ldr r10, [r10] @ kDampFilter[3, 4]
|
||||||
|
str r4, [r11, #2] @ inputState[1, 2], after shift.
|
||||||
|
str r5, [r11, #6] @ inputState[3, 4], after shift.
|
||||||
|
smlad r2, r4, r7, r2
|
||||||
|
smlad r2, r5, r10, r2
|
||||||
|
|
||||||
|
@ Saturate to avoid overflow.
|
||||||
|
@ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
|
||||||
|
@ to avoid overflow in the next saturation step.
|
||||||
|
asr r2, #1
|
||||||
|
add r10, r2, #0x2000
|
||||||
|
ssat r10, #16, r10, asr #14
|
||||||
|
|
||||||
|
@ Subtract from input and update buffer.
|
||||||
|
ldr r11, [sp, #4] @ sign
|
||||||
|
ldrsh r4, [r8]
|
||||||
|
ldrsh r7, [r8], #2 @ inputBuf[*index2]
|
||||||
|
smulbb r5, r11, r10
|
||||||
|
subs r0, #1
|
||||||
|
sub r4, r5
|
||||||
|
ssat r2, #16, r4
|
||||||
|
strh r2, [r12], #2 @ outputBuf[*index2]
|
||||||
|
|
||||||
|
add r2, r7
|
||||||
|
ssat r2, #16, r2
|
||||||
|
strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE]
|
||||||
|
bgt LOOP
|
||||||
|
|
||||||
|
add sp, #8
|
||||||
|
pop {r4-r11}
|
||||||
|
bx lr
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
kDampFilter:
|
||||||
|
.short -2294, 8192, 20972, 8192, -2294
|
Loading…
x
Reference in New Issue
Block a user