performance in ARM V7 improved 7~8 %.
Review URL: http://webrtc-codereview.appspot.com/114007 git-svn-id: http://webrtc.googlecode.com/svn/trunk@384 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
977c2966fc
commit
78dc99e2a1
@ -98,19 +98,39 @@ int WebRtcSpl_ComplexFFT(WebRtc_Word16 frfi[], int stages, int mode)
|
|||||||
wr = WebRtcSpl_kSinTable1024[j + 256];
|
wr = WebRtcSpl_kSinTable1024[j + 256];
|
||||||
wi = -WebRtcSpl_kSinTable1024[j];
|
wi = -WebRtcSpl_kSinTable1024[j];
|
||||||
|
|
||||||
|
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||||
|
WebRtc_Word32 wri;
|
||||||
|
WebRtc_Word32 frfi_r;
|
||||||
|
__asm__("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
|
||||||
|
"r"((WebRtc_Word32)wr), "r"((WebRtc_Word32)wi));
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = m; i < n; i += istep)
|
for (i = m; i < n; i += istep)
|
||||||
{
|
{
|
||||||
j = i + l;
|
j = i + l;
|
||||||
|
|
||||||
tr32 = WEBRTC_SPL_RSHIFT_W32((WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j])
|
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||||
- WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CFFTRND),
|
__asm__("pkhbt %0, %1, %2, lsl #16" : "=r"(frfi_r) :
|
||||||
15 - CFFTSFT);
|
"r"((WebRtc_Word32)frfi[2*j]), "r"((WebRtc_Word32)frfi[2*j +1]));
|
||||||
|
__asm__("smlsd %0, %1, %2, %3" : "=r"(tr32) :
|
||||||
|
"r"(wri), "r"(frfi_r), "r"(CFFTRND));
|
||||||
|
__asm__("smladx %0, %1, %2, %3" : "=r"(ti32) :
|
||||||
|
"r"(wri), "r"(frfi_r), "r"(CFFTRND));
|
||||||
|
|
||||||
ti32 = WEBRTC_SPL_RSHIFT_W32((WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1])
|
#else
|
||||||
+ WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CFFTRND), 15 - CFFTSFT);
|
tr32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j])
|
||||||
|
- WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CFFTRND;
|
||||||
|
|
||||||
|
ti32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1])
|
||||||
|
+ WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CFFTRND;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
tr32 = WEBRTC_SPL_RSHIFT_W32(tr32, 15 - CFFTSFT);
|
||||||
|
ti32 = WEBRTC_SPL_RSHIFT_W32(ti32, 15 - CFFTSFT);
|
||||||
|
|
||||||
qr32 = ((WebRtc_Word32)frfi[2 * i]) << CFFTSFT;
|
qr32 = ((WebRtc_Word32)frfi[2 * i]) << CFFTSFT;
|
||||||
qi32 = ((WebRtc_Word32)frfi[2 * i + 1]) << CFFTSFT;
|
qi32 = ((WebRtc_Word32)frfi[2 * i + 1]) << CFFTSFT;
|
||||||
|
|
||||||
frfi[2 * j] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(
|
frfi[2 * j] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(
|
||||||
(qr32 - tr32 + CFFTRND2), 1 + CFFTSFT);
|
(qr32 - tr32 + CFFTRND2), 1 + CFFTSFT);
|
||||||
frfi[2 * j + 1] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(
|
frfi[2 * j + 1] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(
|
||||||
|
@ -110,21 +110,38 @@ int WebRtcSpl_ComplexIFFT(WebRtc_Word16 frfi[], int stages, int mode)
|
|||||||
wr = WebRtcSpl_kSinTable1024[j + 256];
|
wr = WebRtcSpl_kSinTable1024[j + 256];
|
||||||
wi = WebRtcSpl_kSinTable1024[j];
|
wi = WebRtcSpl_kSinTable1024[j];
|
||||||
|
|
||||||
|
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||||
|
WebRtc_Word32 wri;
|
||||||
|
WebRtc_Word32 frfi_r;
|
||||||
|
__asm__("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
|
||||||
|
"r"((WebRtc_Word32)wr), "r"((WebRtc_Word32)wi));
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = m; i < n; i += istep)
|
for (i = m; i < n; i += istep)
|
||||||
{
|
{
|
||||||
j = i + l;
|
j = i + l;
|
||||||
|
|
||||||
tr32 = WEBRTC_SPL_RSHIFT_W32((WEBRTC_SPL_MUL_16_16_RSFT(wr, frfi[2 * j], 0)
|
#ifdef WEBRTC_ARCH_ARM_V7A
|
||||||
- WEBRTC_SPL_MUL_16_16_RSFT(wi, frfi[2 * j + 1], 0) + CIFFTRND),
|
__asm__("pkhbt %0, %1, %2, lsl #16" : "=r"(frfi_r) :
|
||||||
15 - CIFFTSFT);
|
"r"((WebRtc_Word32)frfi[2*j]), "r"((WebRtc_Word32)frfi[2*j +1]));
|
||||||
|
__asm__("smlsd %0, %1, %2, %3" : "=r"(tr32) :
|
||||||
|
"r"(wri), "r"(frfi_r), "r"(CIFFTRND));
|
||||||
|
__asm__("smladx %0, %1, %2, %3" : "=r"(ti32) :
|
||||||
|
"r"(wri), "r"(frfi_r), "r"(CIFFTRND));
|
||||||
|
#else
|
||||||
|
|
||||||
ti32 = WEBRTC_SPL_RSHIFT_W32(
|
tr32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j])
|
||||||
(WEBRTC_SPL_MUL_16_16_RSFT(wr, frfi[2 * j + 1], 0)
|
- WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CIFFTRND;
|
||||||
+ WEBRTC_SPL_MUL_16_16_RSFT(wi, frfi[2 * j], 0)
|
|
||||||
+ CIFFTRND), 15 - CIFFTSFT);
|
ti32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1])
|
||||||
|
+ WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CIFFTRND;
|
||||||
|
#endif
|
||||||
|
tr32 = WEBRTC_SPL_RSHIFT_W32(tr32, 15 - CIFFTSFT);
|
||||||
|
ti32 = WEBRTC_SPL_RSHIFT_W32(ti32, 15 - CIFFTSFT);
|
||||||
|
|
||||||
qr32 = ((WebRtc_Word32)frfi[2 * i]) << CIFFTSFT;
|
qr32 = ((WebRtc_Word32)frfi[2 * i]) << CIFFTSFT;
|
||||||
qi32 = ((WebRtc_Word32)frfi[2 * i + 1]) << CIFFTSFT;
|
qi32 = ((WebRtc_Word32)frfi[2 * i + 1]) << CIFFTSFT;
|
||||||
|
|
||||||
frfi[2 * j] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((qr32 - tr32+round2),
|
frfi[2 * j] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((qr32 - tr32+round2),
|
||||||
shift+CIFFTSFT);
|
shift+CIFFTSFT);
|
||||||
frfi[2 * j + 1] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(
|
frfi[2 * j + 1] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(
|
||||||
|
Loading…
Reference in New Issue
Block a user