optimization of resampling by 2 in ARMv7, in spl.

Review URL: http://webrtc-codereview.appspot.com/92015

git-svn-id: http://webrtc.googlecode.com/svn/trunk@327 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@google.com 2011-08-08 15:40:10 +00:00
parent f67f197049
commit 881103225d
2 changed files with 97 additions and 43 deletions

View File

@ -23,13 +23,16 @@ ifeq ($(TARGET_ARCH),arm)
MY_WEBRTC_COMMON_DEFS += \
'-DWEBRTC_ARM_INLINE_CALLS' \
'-DWEBRTC_ARCH_ARM'
# TODO: test if the code under next two MACROs works with generic GCC compilers
# TODO(kma): test if the code under next two macros works with generic GCC compilers
ifeq ($(ARCH_ARM_HAVE_NEON),true)
MY_WEBRTC_COMMON_DEFS += \
'-DWEBRTC_ANDROID_ARMV7A_NEON'
else ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
'-DWEBRTC_ARCH_ARM_NEON'
endif
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
MY_WEBRTC_COMMON_DEFS += \
'-DWEBRTC_ANDROID_ARMV7A'
'-DWEBRTC_ARCH_ARM_V7A'
endif
else ifeq ($(TARGET_ARCH),x86)
MY_WEBRTC_COMMON_DEFS += \

View File

@ -36,43 +36,66 @@ void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len,
outptr = out; // output array (of length len/2)
state = filtState; // filter state array; length = 8
register WebRtc_Word32 state0 = state[0];
register WebRtc_Word32 state1 = state[1];
register WebRtc_Word32 state2 = state[2];
register WebRtc_Word32 state3 = state[3];
register WebRtc_Word32 state4 = state[4];
register WebRtc_Word32 state5 = state[5];
register WebRtc_Word32 state6 = state[6];
register WebRtc_Word32 state7 = state[7];
for (i = (len >> 1); i > 0; i--)
{
// lower allpass filter
in32 = (WebRtc_Word32)(*inptr++) << 10;
diff = in32 - state[1];
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state[0] );
state[0] = in32;
diff = tmp1 - state[2];
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state[1] );
state[1] = tmp1;
diff = tmp2 - state[3];
state[3] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state[2] );
state[2] = tmp2;
diff = in32 - state1;
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state0 );
state0 = in32;
diff = tmp1 - state2;
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state1 );
state1 = tmp1;
diff = tmp2 - state3;
state3 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state2 );
state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*inptr++) << 10;
diff = in32 - state[5];
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state[4] );
state[4] = in32;
diff = tmp1 - state[6];
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state[5] );
state[5] = tmp1;
diff = tmp2 - state[7];
state[7] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state[6] );
state[6] = tmp2;
diff = in32 - state5;
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state4 );
state4 = in32;
diff = tmp1 - state6;
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state5 );
state5 = tmp1;
diff = tmp2 - state7;
state7 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state6 );
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state[3] + state[7] + 1024) >> 11;
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
#ifdef WEBRTC_ARCH_ARM_V7A
__asm__("ssat %r0, #16, %r1" : "=r"(*outptr) : "r"(out32));
outptr++;
#else
if (out32 > 32767)
*outptr++ = 32767;
else if (out32 < -32768)
*outptr++ = -32768;
else
*outptr++ = (WebRtc_Word16)out32;
#endif
}
state[0]=state0;
state[1]=state1;
state[2]=state2;
state[3]=state3;
state[4]=state4;
state[5]=state5;
state[6]=state6;
state[7]=state7;
}
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, WebRtc_Word16* out,
@ -89,47 +112,75 @@ void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, WebRtc_Wo
outptr = out; // output array (of length len*2)
state = filtState; // filter state array; length = 8
register WebRtc_Word32 state0 = state[0];
register WebRtc_Word32 state1 = state[1];
register WebRtc_Word32 state2 = state[2];
register WebRtc_Word32 state3 = state[3];
register WebRtc_Word32 state4 = state[4];
register WebRtc_Word32 state5 = state[5];
register WebRtc_Word32 state6 = state[6];
register WebRtc_Word32 state7 = state[7];
for (i = len; i > 0; i--)
{
// lower allpass filter
in32 = (WebRtc_Word32)(*inptr++) << 10;
diff = in32 - state[1];
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state[0] );
state[0] = in32;
diff = tmp1 - state[2];
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state[1] );
state[1] = tmp1;
diff = tmp2 - state[3];
state[3] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state[2] );
state[2] = tmp2;
diff = in32 - state1;
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state0 );
state0 = in32;
diff = tmp1 - state2;
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state1 );
state1 = tmp1;
diff = tmp2 - state3;
state3 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state2 );
state2 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state[3] + 512) >> 10;
out32 = (state3 + 512) >> 10;
#ifdef WEBRTC_ARCH_ARM_V7A
__asm__("ssat %r0, #16, %r1":"=r"(*outptr): "r"(out32));
outptr++;
#else
if (out32 > 32767)
*outptr++ = 32767;
else if (out32 < -32768)
*outptr++ = -32768;
else
*outptr++ = (WebRtc_Word16)out32;
#endif
// upper allpass filter
diff = in32 - state[5];
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state[4] );
state[4] = in32;
diff = tmp1 - state[6];
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state[5] );
state[5] = tmp1;
diff = tmp2 - state[7];
state[7] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state[6] );
state[6] = tmp2;
diff = in32 - state5;
tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state4 );
state4 = in32;
diff = tmp1 - state6;
tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state5 );
state5 = tmp1;
diff = tmp2 - state7;
state7 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state6 );
state6 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state[7] + 512) >> 10;
out32 = (state7 + 512) >> 10;
#ifdef WEBRTC_ARCH_ARM_V7A
__asm__("ssat %r0, #16, %r1":"=r"(*outptr): "r"(out32));
outptr++;
#else
if (out32 > 32767)
*outptr++ = 32767;
else if (out32 < -32768)
*outptr++ = -32768;
else
*outptr++ = (WebRtc_Word16)out32;
#endif
}
state[0]=state0;
state[1]=state1;
state[2]=state2;
state[3]=state3;
state[4]=state4;
state[5]=state5;
state[6]=state6;
state[7]=state7;
}