Optimized PCorr2Q32() in iSAC with intrinsics in ARM Neon platform.
Review URL: https://webrtc-codereview.appspot.com/634004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2497 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -15,7 +15,9 @@ | |||||||
|  * |  * | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #include <string.h> | #ifdef WEBRTC_ARCH_ARM_NEON | ||||||
|  | #include <arm_neon.h> | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #include "signal_processing_library.h" | #include "signal_processing_library.h" | ||||||
| #include "pitch_estimator.h" | #include "pitch_estimator.h" | ||||||
| @@ -201,15 +203,44 @@ static void PCorr2Q32(const WebRtc_Word16 *in, WebRtc_Word32 *logcorQ8) | |||||||
|     inptr = &in[k]; |     inptr = &in[k]; | ||||||
|     ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) in[k-1],(WebRtc_Word16) in[k-1], scaling); |     ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) in[k-1],(WebRtc_Word16) in[k-1], scaling); | ||||||
|     ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) in[PITCH_CORR_LEN2 + k - 1],(WebRtc_Word16) in[PITCH_CORR_LEN2 + k - 1], scaling); |     ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) in[PITCH_CORR_LEN2 + k - 1],(WebRtc_Word16) in[PITCH_CORR_LEN2 + k - 1], scaling); | ||||||
|     csum32 = 0; |  | ||||||
|     prod32 = WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) x[0],(WebRtc_Word16) inptr[0], scaling); |  | ||||||
|  |  | ||||||
|     for (n = 1; n < PITCH_CORR_LEN2; n++) { | #ifdef WEBRTC_ARCH_ARM_NEON | ||||||
|       csum32 += prod32; |     { | ||||||
|       prod32 = WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) x[n],(WebRtc_Word16) inptr[n], scaling); |       int32_t vbuff[4]; | ||||||
|  |       int32x4_t int_32x4_sum = vmovq_n_s32(0); | ||||||
|  |       // Can't shift a Neon register to right with a non-constant shift value. | ||||||
|  |       int32x4_t int_32x4_scale = vdupq_n_s32(-scaling); | ||||||
|  |       // Assert a codition used in loop unrolling at compile-time. | ||||||
|  |       WEBRTC_STATIC_ASSERT(PITCH_CORR_LEN2, PITCH_CORR_LEN2 %4 == 0); | ||||||
|  |  | ||||||
|  |       for (n = 0; n < PITCH_CORR_LEN2; n += 4) { | ||||||
|  |         int16x4_t int_16x4_x = vld1_s16(&x[n]); | ||||||
|  |         int16x4_t int_16x4_in = vld1_s16(&inptr[n]); | ||||||
|  |         int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in); | ||||||
|  |         int_32x4 = vshlq_s32(int_32x4, int_32x4_scale); | ||||||
|  |         int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|     csum32 += prod32; |       // Use vector store to avoid long stall from data trasferring | ||||||
|  |       // from vector to general register. | ||||||
|  |       vst1q_s32(vbuff, int_32x4_sum); | ||||||
|  |       csum32 = vbuff[0] + vbuff[1]; | ||||||
|  |       csum32 += vbuff[2]; | ||||||
|  |       csum32 += vbuff[3]; | ||||||
|  |     } | ||||||
|  | #else | ||||||
|  |     csum32 = 0; | ||||||
|  |     if(scaling == 0) { | ||||||
|  |       for (n = 0; n < PITCH_CORR_LEN2; n++) { | ||||||
|  |         csum32 += x[n] * inptr[n]; | ||||||
|  |       } | ||||||
|  |     } else { | ||||||
|  |       for (n = 0; n < PITCH_CORR_LEN2; n++) { | ||||||
|  |         csum32 += (x[n] * inptr[n]) >> scaling; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|     logcorQ8--; |     logcorQ8--; | ||||||
|  |  | ||||||
|     lys=Log2Q8((WebRtc_UWord32)ysum32); // Q8 |     lys=Log2Q8((WebRtc_UWord32)ysum32); // Q8 | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| /* | /* | ||||||
|  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||||
|  * |  * | ||||||
|  *  Use of this source code is governed by a BSD-style license |  *  Use of this source code is governed by a BSD-style license | ||||||
|  *  that can be found in the LICENSE file in the root of the source |  *  that can be found in the LICENSE file in the root of the source | ||||||
| @@ -20,7 +20,10 @@ | |||||||
|  |  | ||||||
| #include "structs.h" | #include "structs.h" | ||||||
|  |  | ||||||
|  | // TODO(andrew): put this into general WebRTC so other modules can use it. | ||||||
|  | // Define a compiler-time assertion. | ||||||
|  | #define WEBRTC_STATIC_ASSERT(name, boolean_cond) \ | ||||||
|  |   static char const static_assert_##name[(boolean_cond) ? 1 : -1] = {'!'} | ||||||
|  |  | ||||||
| void WebRtcIsacfix_PitchAnalysis(const WebRtc_Word16 *in,               /* PITCH_FRAME_LEN samples */ | void WebRtcIsacfix_PitchAnalysis(const WebRtc_Word16 *in,               /* PITCH_FRAME_LEN samples */ | ||||||
|                                  WebRtc_Word16 *outQ0,                  /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ |                                  WebRtc_Word16 *outQ0,                  /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ | ||||||
| @@ -28,7 +31,6 @@ void WebRtcIsacfix_PitchAnalysis(const WebRtc_Word16 *in,               /* PITCH | |||||||
|                                  WebRtc_Word16 *lagsQ7, |                                  WebRtc_Word16 *lagsQ7, | ||||||
|                                  WebRtc_Word16 *PitchGains_Q12); |                                  WebRtc_Word16 *PitchGains_Q12); | ||||||
|  |  | ||||||
|  |  | ||||||
| void WebRtcIsacfix_InitialPitch(const WebRtc_Word16 *in, | void WebRtcIsacfix_InitialPitch(const WebRtc_Word16 *in, | ||||||
|                                 PitchAnalysisStruct *State, |                                 PitchAnalysisStruct *State, | ||||||
|                                 WebRtc_Word16 *qlags); |                                 WebRtc_Word16 *qlags); | ||||||
| @@ -45,8 +47,6 @@ void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0, | |||||||
|                                     WebRtc_Word16 *lagsQ7, |                                     WebRtc_Word16 *lagsQ7, | ||||||
|                                     WebRtc_Word16 *gainsQ12); |                                     WebRtc_Word16 *gainsQ12); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| void WebRtcIsacfix_DecimateAllpass32(const WebRtc_Word16 *in, | void WebRtcIsacfix_DecimateAllpass32(const WebRtc_Word16 *in, | ||||||
|                                      WebRtc_Word32 *state_in,        /* array of size: 2*ALLPASSSECTIONS+1 */ |                                      WebRtc_Word32 *state_in,        /* array of size: 2*ALLPASSSECTIONS+1 */ | ||||||
|                                      WebRtc_Word16 N,                   /* number of input samples */ |                                      WebRtc_Word16 N,                   /* number of input samples */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 kma@webrtc.org
					kma@webrtc.org