From 72f8a6d77bf0d9815ab0657cbf7c419d35bf93db Mon Sep 17 00:00:00 2001 From: "kma@webrtc.org" Date: Mon, 9 Jul 2012 23:27:02 +0000 Subject: [PATCH] Optimized PCorr2Q32() in iSAC with intrinsics in ARM Neon platform. Review URL: https://webrtc-codereview.appspot.com/634004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2497 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/iSAC/fix/source/pitch_estimator.c | 45 ++++++++++++++++--- .../codecs/iSAC/fix/source/pitch_estimator.h | 10 ++--- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.c b/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.c index 1702098ed..d95c19b3d 100644 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.c +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.c @@ -15,7 +15,9 @@ * */ -#include +#ifdef WEBRTC_ARCH_ARM_NEON +#include +#endif #include "signal_processing_library.h" #include "pitch_estimator.h" @@ -201,15 +203,44 @@ static void PCorr2Q32(const WebRtc_Word16 *in, WebRtc_Word32 *logcorQ8) inptr = &in[k]; ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) in[k-1],(WebRtc_Word16) in[k-1], scaling); ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) in[PITCH_CORR_LEN2 + k - 1],(WebRtc_Word16) in[PITCH_CORR_LEN2 + k - 1], scaling); - csum32 = 0; - prod32 = WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) x[0],(WebRtc_Word16) inptr[0], scaling); - for (n = 1; n < PITCH_CORR_LEN2; n++) { - csum32 += prod32; - prod32 = WEBRTC_SPL_MUL_16_16_RSFT( (WebRtc_Word16) x[n],(WebRtc_Word16) inptr[n], scaling); +#ifdef WEBRTC_ARCH_ARM_NEON + { + int32_t vbuff[4]; + int32x4_t int_32x4_sum = vmovq_n_s32(0); + // Can't shift a Neon register to right with a non-constant shift value. + int32x4_t int_32x4_scale = vdupq_n_s32(-scaling); + // Assert a codition used in loop unrolling at compile-time. + WEBRTC_STATIC_ASSERT(PITCH_CORR_LEN2, PITCH_CORR_LEN2 %4 == 0); + + for (n = 0; n < PITCH_CORR_LEN2; n += 4) { + int16x4_t int_16x4_x = vld1_s16(&x[n]); + int16x4_t int_16x4_in = vld1_s16(&inptr[n]); + int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in); + int_32x4 = vshlq_s32(int_32x4, int_32x4_scale); + int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4); + } + + // Use vector store to avoid long stall from data trasferring + // from vector to general register. + vst1q_s32(vbuff, int_32x4_sum); + csum32 = vbuff[0] + vbuff[1]; + csum32 += vbuff[2]; + csum32 += vbuff[3]; } +#else + csum32 = 0; + if(scaling == 0) { + for (n = 0; n < PITCH_CORR_LEN2; n++) { + csum32 += x[n] * inptr[n]; + } + } else { + for (n = 0; n < PITCH_CORR_LEN2; n++) { + csum32 += (x[n] * inptr[n]) >> scaling; + } + } +#endif - csum32 += prod32; logcorQ8--; lys=Log2Q8((WebRtc_UWord32)ysum32); // Q8 diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.h b/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.h index afdc9785d..8ba034c3b 100644 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.h +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/pitch_estimator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -20,7 +20,10 @@ #include "structs.h" - +// TODO(andrew): put this into general WebRTC so other modules can use it. +// Define a compiler-time assertion. +#define WEBRTC_STATIC_ASSERT(name, boolean_cond) \ + static char const static_assert_##name[(boolean_cond) ? 1 : -1] = {'!'} void WebRtcIsacfix_PitchAnalysis(const WebRtc_Word16 *in, /* PITCH_FRAME_LEN samples */ WebRtc_Word16 *outQ0, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ @@ -28,7 +31,6 @@ void WebRtcIsacfix_PitchAnalysis(const WebRtc_Word16 *in, /* PITCH WebRtc_Word16 *lagsQ7, WebRtc_Word16 *PitchGains_Q12); - void WebRtcIsacfix_InitialPitch(const WebRtc_Word16 *in, PitchAnalysisStruct *State, WebRtc_Word16 *qlags); @@ -45,8 +47,6 @@ void WebRtcIsacfix_PitchFilterGains(const WebRtc_Word16 *indatQ0, WebRtc_Word16 *lagsQ7, WebRtc_Word16 *gainsQ12); - - void WebRtcIsacfix_DecimateAllpass32(const WebRtc_Word16 *in, WebRtc_Word32 *state_in, /* array of size: 2*ALLPASSSECTIONS+1 */ WebRtc_Word16 N, /* number of input samples */