In spl, introduced function WebRtcSpl_Sat32To16(), and changed file resample_by_2.c, both for optimization in ARMv7.
Review URL: http://webrtc-codereview.appspot.com/140010 git-svn-id: http://webrtc.googlecode.com/svn/trunk@649 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
e185e9f68a
commit
961885a8bb
@ -1659,6 +1659,17 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
|
||||
// - out_data : Super-wideband speech signal, 0-16 kHz
|
||||
//
|
||||
|
||||
// WebRtc_Word16 WebRtcSpl_SatW32ToW16(...)
|
||||
//
|
||||
// This function saturates a 32-bit word into a 16-bit word.
|
||||
//
|
||||
// Input:
|
||||
// - value32 : The value of a 32-bit word.
|
||||
//
|
||||
// Output:
|
||||
// - out16 : the saturated 16-bit word.
|
||||
//
|
||||
|
||||
// WebRtc_Word16 WebRtcSpl_get_version(...)
|
||||
//
|
||||
// This function gives the version string of the Signal Processing Library.
|
||||
|
@ -19,16 +19,20 @@
|
||||
#include "spl_inl_armv7.h"
|
||||
#else
|
||||
|
||||
static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
|
||||
WebRtc_Word16 out16 = (WebRtc_Word16) value32;
|
||||
|
||||
if (value32 > 32767)
|
||||
out16 = 32767;
|
||||
else if (value32 < -32768)
|
||||
out16 = -32768;
|
||||
|
||||
return out16;
|
||||
}
|
||||
|
||||
static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a,
|
||||
WebRtc_Word16 b) {
|
||||
WebRtc_Word32 s_sum = (WebRtc_Word32) a + (WebRtc_Word32) b;
|
||||
|
||||
if (s_sum > WEBRTC_SPL_WORD16_MAX)
|
||||
s_sum = WEBRTC_SPL_WORD16_MAX;
|
||||
else if (s_sum < WEBRTC_SPL_WORD16_MIN)
|
||||
s_sum = WEBRTC_SPL_WORD16_MIN;
|
||||
|
||||
return (WebRtc_Word16)s_sum;
|
||||
return WebRtcSpl_SatW32ToW16((WebRtc_Word32) a + (WebRtc_Word32) b);
|
||||
}
|
||||
|
||||
static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1,
|
||||
@ -54,24 +58,7 @@ static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1,
|
||||
|
||||
static __inline WebRtc_Word16 WebRtcSpl_SubSatW16(WebRtc_Word16 var1,
|
||||
WebRtc_Word16 var2) {
|
||||
WebRtc_Word32 l_diff;
|
||||
WebRtc_Word16 s_diff;
|
||||
|
||||
// perform subtraction
|
||||
l_diff = (WebRtc_Word32)var1 - (WebRtc_Word32)var2;
|
||||
|
||||
// default setting
|
||||
s_diff = (WebRtc_Word16) l_diff;
|
||||
|
||||
// check for overflow
|
||||
if (l_diff > (WebRtc_Word32)32767)
|
||||
s_diff = (WebRtc_Word16)32767;
|
||||
|
||||
// check for underflow
|
||||
if (l_diff < (WebRtc_Word32)-32768)
|
||||
s_diff = (WebRtc_Word16)-32768;
|
||||
|
||||
return s_diff;
|
||||
return WebRtcSpl_SatW32ToW16((WebRtc_Word32) var1 - (WebRtc_Word32) var2);
|
||||
}
|
||||
|
||||
static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1,
|
||||
|
@ -119,4 +119,11 @@ static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) {
|
||||
return tmp - 17;
|
||||
}
|
||||
|
||||
static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
|
||||
WebRtc_Word16 out16;
|
||||
|
||||
__asm__("ssat %r0, #16, %r1" : "=r"(out16) : "r"(value32));
|
||||
|
||||
return out16;
|
||||
}
|
||||
#endif // WEBRTC_SPL_SPL_INL_ARMV7_H_
|
||||
|
@ -52,7 +52,7 @@ int WebRtcSpl_DownsampleFast(WebRtc_Word16 *in_ptr, WebRtc_Word16 in_length,
|
||||
|
||||
// If output is higher than 32768, saturate it. Same with negative side
|
||||
|
||||
*downsampled_ptr++ = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, o, -32768);
|
||||
*downsampled_ptr++ = WebRtcSpl_SatW32ToW16(o);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -17,154 +17,165 @@
|
||||
|
||||
#include "signal_processing_library.h"
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||
|
||||
// allpass filter coefficients.
|
||||
static const WebRtc_UWord32 kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
|
||||
static const WebRtc_UWord32 kResampleAllpass2[3] =
|
||||
{12199, 37471 << 15, 60255 << 15};
|
||||
|
||||
// Multiply two 32-bit values and accumulate to another input value.
|
||||
// Return: state + ((diff * tbl_value) >> 16)
|
||||
|
||||
static __inline WebRtc_Word32 MUL_ACCUM_1(WebRtc_Word32 tbl_value,
|
||||
WebRtc_Word32 diff,
|
||||
WebRtc_Word32 state) {
|
||||
WebRtc_Word32 result;
|
||||
__asm__("smlawb %r0, %r1, %r2, %r3": "=r"(result): "r"(diff),
|
||||
"r"(tbl_value), "r"(state));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Multiply two 32-bit values and accumulate to another input value.
|
||||
// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
|
||||
//
|
||||
// The reason to introduce this function is that, in case we can't use smlawb
|
||||
// instruction (in MUL_ACCUM_1) due to input value range, we can still use
|
||||
// smmla to save some cycles.
|
||||
|
||||
static __inline WebRtc_Word32 MUL_ACCUM_2(WebRtc_Word32 tbl_value,
|
||||
WebRtc_Word32 diff,
|
||||
WebRtc_Word32 state) {
|
||||
WebRtc_Word32 result;
|
||||
__asm__("smmla %r0, %r1, %r2, %r3": "=r"(result): "r"(diff << 1),
|
||||
"r"(tbl_value), "r"(state));
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// allpass filter coefficients.
|
||||
static const WebRtc_UWord16 kResampleAllpass1[3] = {3284, 24441, 49528};
|
||||
static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255};
|
||||
|
||||
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
|
||||
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
|
||||
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
|
||||
|
||||
#endif // WEBRTC_ARCH_ARM_V7A
|
||||
|
||||
|
||||
// decimator
|
||||
void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len,
|
||||
WebRtc_Word16* out, WebRtc_Word32* filtState)
|
||||
{
|
||||
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
|
||||
WebRtc_Word16 i;
|
||||
WebRtc_Word16* out, WebRtc_Word32* filtState) {
|
||||
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
|
||||
WebRtc_Word16 i;
|
||||
|
||||
register WebRtc_Word32 state0 = filtState[0];
|
||||
register WebRtc_Word32 state1 = filtState[1];
|
||||
register WebRtc_Word32 state2 = filtState[2];
|
||||
register WebRtc_Word32 state3 = filtState[3];
|
||||
register WebRtc_Word32 state4 = filtState[4];
|
||||
register WebRtc_Word32 state5 = filtState[5];
|
||||
register WebRtc_Word32 state6 = filtState[6];
|
||||
register WebRtc_Word32 state7 = filtState[7];
|
||||
register WebRtc_Word32 state0 = filtState[0];
|
||||
register WebRtc_Word32 state1 = filtState[1];
|
||||
register WebRtc_Word32 state2 = filtState[2];
|
||||
register WebRtc_Word32 state3 = filtState[3];
|
||||
register WebRtc_Word32 state4 = filtState[4];
|
||||
register WebRtc_Word32 state5 = filtState[5];
|
||||
register WebRtc_Word32 state6 = filtState[6];
|
||||
register WebRtc_Word32 state7 = filtState[7];
|
||||
|
||||
for (i = (len >> 1); i > 0; i--)
|
||||
{
|
||||
// lower allpass filter
|
||||
in32 = (WebRtc_Word32)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
for (i = (len >> 1); i > 0; i--) {
|
||||
// lower allpass filter
|
||||
in32 = (WebRtc_Word32)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
in32 = (WebRtc_Word32)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
// upper allpass filter
|
||||
in32 = (WebRtc_Word32)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||
__asm__("ssat %r0, #16, %r1" : "=r"(*out) : "r"(out32));
|
||||
out++;
|
||||
#else
|
||||
if (out32 > 32767)
|
||||
*out++ = 32767;
|
||||
else if (out32 < -32768)
|
||||
*out++ = -32768;
|
||||
else
|
||||
*out++ = (WebRtc_Word16)out32;
|
||||
#endif
|
||||
}
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
}
|
||||
|
||||
filtState[0] = state0;
|
||||
filtState[1] = state1;
|
||||
filtState[2] = state2;
|
||||
filtState[3] = state3;
|
||||
filtState[4] = state4;
|
||||
filtState[5] = state5;
|
||||
filtState[6] = state6;
|
||||
filtState[7] = state7;
|
||||
filtState[0] = state0;
|
||||
filtState[1] = state1;
|
||||
filtState[2] = state2;
|
||||
filtState[3] = state3;
|
||||
filtState[4] = state4;
|
||||
filtState[5] = state5;
|
||||
filtState[6] = state6;
|
||||
filtState[7] = state7;
|
||||
}
|
||||
|
||||
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, WebRtc_Word16* out,
|
||||
WebRtc_Word32* filtState)
|
||||
{
|
||||
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
|
||||
WebRtc_Word16 i;
|
||||
|
||||
register WebRtc_Word32 state0 = filtState[0];
|
||||
register WebRtc_Word32 state1 = filtState[1];
|
||||
register WebRtc_Word32 state2 = filtState[2];
|
||||
register WebRtc_Word32 state3 = filtState[3];
|
||||
register WebRtc_Word32 state4 = filtState[4];
|
||||
register WebRtc_Word32 state5 = filtState[5];
|
||||
register WebRtc_Word32 state6 = filtState[6];
|
||||
register WebRtc_Word32 state7 = filtState[7];
|
||||
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len,
|
||||
WebRtc_Word16* out, WebRtc_Word32* filtState) {
|
||||
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
|
||||
WebRtc_Word16 i;
|
||||
|
||||
for (i = len; i > 0; i--)
|
||||
{
|
||||
// lower allpass filter
|
||||
in32 = (WebRtc_Word32)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
register WebRtc_Word32 state0 = filtState[0];
|
||||
register WebRtc_Word32 state1 = filtState[1];
|
||||
register WebRtc_Word32 state2 = filtState[2];
|
||||
register WebRtc_Word32 state3 = filtState[3];
|
||||
register WebRtc_Word32 state4 = filtState[4];
|
||||
register WebRtc_Word32 state5 = filtState[5];
|
||||
register WebRtc_Word32 state6 = filtState[6];
|
||||
register WebRtc_Word32 state7 = filtState[7];
|
||||
|
||||
// round; limit amplitude to prevent wrap-around; write to output array
|
||||
out32 = (state3 + 512) >> 10;
|
||||
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||
__asm__("ssat %r0, #16, %r1":"=r"(*out): "r"(out32));
|
||||
out++;
|
||||
#else
|
||||
if (out32 > 32767)
|
||||
*out++ = 32767;
|
||||
else if (out32 < -32768)
|
||||
*out++ = -32768;
|
||||
else
|
||||
*out++ = (WebRtc_Word16)out32;
|
||||
#endif
|
||||
for (i = len; i > 0; i--) {
|
||||
// lower allpass filter
|
||||
in32 = (WebRtc_Word32)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
diff = in32 - state5;
|
||||
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
// round; limit amplitude to prevent wrap-around; write to output array
|
||||
out32 = (state3 + 512) >> 10;
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
|
||||
// round; limit amplitude to prevent wrap-around; write to output array
|
||||
out32 = (state7 + 512) >> 10;
|
||||
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||
__asm__("ssat %r0, #16, %r1":"=r"(*out): "r"(out32));
|
||||
out++;
|
||||
#else
|
||||
if (out32 > 32767)
|
||||
*out++ = 32767;
|
||||
else if (out32 < -32768)
|
||||
*out++ = -32768;
|
||||
else
|
||||
*out++ = (WebRtc_Word16)out32;
|
||||
#endif
|
||||
}
|
||||
|
||||
filtState[0] = state0;
|
||||
filtState[1] = state1;
|
||||
filtState[2] = state2;
|
||||
filtState[3] = state3;
|
||||
filtState[4] = state4;
|
||||
filtState[5] = state5;
|
||||
filtState[6] = state6;
|
||||
filtState[7] = state7;
|
||||
// upper allpass filter
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// round; limit amplitude to prevent wrap-around; write to output array
|
||||
out32 = (state7 + 512) >> 10;
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
}
|
||||
|
||||
filtState[0] = state0;
|
||||
filtState[1] = state1;
|
||||
filtState[2] = state2;
|
||||
filtState[3] = state3;
|
||||
filtState[4] = state4;
|
||||
filtState[5] = state5;
|
||||
filtState[6] = state6;
|
||||
filtState[7] = state7;
|
||||
}
|
||||
|
@ -147,13 +147,11 @@ void WebRtcSpl_AnalysisQMF(const WebRtc_Word16* in_data, WebRtc_Word16* low_band
|
||||
{
|
||||
tmp = filter1[i] + filter2[i] + 1024;
|
||||
tmp = WEBRTC_SPL_RSHIFT_W32(tmp, 11);
|
||||
low_band[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||
tmp, WEBRTC_SPL_WORD16_MIN);
|
||||
low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
|
||||
tmp = filter1[i] - filter2[i] + 1024;
|
||||
tmp = WEBRTC_SPL_RSHIFT_W32(tmp, 11);
|
||||
high_band[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||
tmp, WEBRTC_SPL_WORD16_MIN);
|
||||
high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
@ -191,10 +189,10 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, const WebRtc_Word16*
|
||||
for (i = 0, k = 0; i < kBandFrameLength; i++)
|
||||
{
|
||||
tmp = WEBRTC_SPL_RSHIFT_W32(filter2[i] + 512, 10);
|
||||
out_data[k++] = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmp, -32768);
|
||||
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
|
||||
tmp = WEBRTC_SPL_RSHIFT_W32(filter1[i] + 512, 10);
|
||||
out_data[k++] = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmp, -32768);
|
||||
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -125,7 +125,7 @@ void WebRtcSpl_ScaleVectorWithSat(G_CONST WebRtc_Word16 *in_vector, WebRtc_Word1
|
||||
for (i = 0; i < in_vector_length; i++)
|
||||
{
|
||||
tmpW32 = WEBRTC_SPL_MUL_16_16_RSFT(*inptr++, gain, right_shifts);
|
||||
( *outptr++) = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmpW32, -32768);
|
||||
(*outptr++) = WebRtcSpl_SatW32ToW16(tmpW32);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user