Changed keyword __restrict__ to __restrict.
Review URL: http://webrtc-codereview.appspot.com/279011 git-svn-id: http://webrtc.googlecode.com/svn/trunk@978 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
3798ecb25b
commit
ced118636d
@ -131,10 +131,10 @@ void WebRtcIsacfix_NormLatticeFilterAr(WebRtc_Word16 orderCoef,
|
|||||||
WebRtc_Word16 lo_hi,
|
WebRtc_Word16 lo_hi,
|
||||||
WebRtc_Word16 *lat_outQ0);
|
WebRtc_Word16 *lat_outQ0);
|
||||||
|
|
||||||
int WebRtcIsacfix_AutocorrFix(WebRtc_Word32 *r,
|
int WebRtcIsacfix_AutocorrFix(WebRtc_Word32* __restrict r,
|
||||||
const WebRtc_Word16 *x,
|
const WebRtc_Word16* __restrict x,
|
||||||
WebRtc_Word16 N,
|
WebRtc_Word16 N,
|
||||||
WebRtc_Word16 order,
|
WebRtc_Word16 order,
|
||||||
WebRtc_Word16 *scale);
|
WebRtc_Word16* __restrict scale);
|
||||||
|
|
||||||
#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_CODEC_H_ */
|
#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_CODEC_H_ */
|
||||||
|
@ -22,50 +22,44 @@
|
|||||||
#include "lpc_masking_model.h"
|
#include "lpc_masking_model.h"
|
||||||
#include "codec.h"
|
#include "codec.h"
|
||||||
|
|
||||||
|
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||||
/* Autocorrelation function in fixed point. NOTE! Different from SPLIB-version in how it scales the signal. */
|
// Autocorrelation function in fixed point.
|
||||||
|
// NOTE! Different from SPLIB-version in how it scales the signal.
|
||||||
int WebRtcIsacfix_AutocorrFix(
|
int WebRtcIsacfix_AutocorrFix(
|
||||||
WebRtc_Word32 *r,
|
WebRtc_Word32* __restrict r,
|
||||||
const WebRtc_Word16 *x,
|
const WebRtc_Word16* __restrict x,
|
||||||
WebRtc_Word16 N,
|
WebRtc_Word16 N,
|
||||||
WebRtc_Word16 order,
|
WebRtc_Word16 order,
|
||||||
WebRtc_Word16 *scale)
|
WebRtc_Word16* __restrict scale) {
|
||||||
{
|
|
||||||
int j, i;
|
|
||||||
WebRtc_Word16 scaling;
|
|
||||||
WebRtc_Word32 sum, prod, newsum;
|
|
||||||
G_CONST WebRtc_Word16 *xptr1;
|
|
||||||
G_CONST WebRtc_Word16 *xptr2;
|
|
||||||
|
|
||||||
sum=0;
|
int i = 0;
|
||||||
scaling=0;
|
int j = 0;
|
||||||
/* Calculate r[0] and how much scaling is needed */
|
int16_t scaling = 0;
|
||||||
for (i=0; i < N; i++) {
|
int32_t sum = 0;
|
||||||
prod = WEBRTC_SPL_MUL_16_16_RSFT(x[i],x[i],scaling);
|
uint32_t temp = 0;
|
||||||
newsum = sum+prod;
|
int64_t prod = 0;
|
||||||
/* If sum gets less than 0 we have overflow and need to scale the signal */
|
|
||||||
if(newsum<0) {
|
// Calculate r[0].
|
||||||
scaling++;
|
for (i = 0; i < N; i++) {
|
||||||
sum=WEBRTC_SPL_RSHIFT_W32(sum, 1);
|
prod += WEBRTC_SPL_MUL_16_16(x[i], x[i]);
|
||||||
prod=WEBRTC_SPL_RSHIFT_W32(prod, 1);
|
|
||||||
}
|
|
||||||
sum += prod;
|
|
||||||
}
|
}
|
||||||
r[0]=sum;
|
|
||||||
|
|
||||||
/* Perform the actual correlation calculation */
|
// Calculate scaling (the value of shifting).
|
||||||
for (i = 1; i < order + 1; i++)
|
temp = (uint32_t)(prod >> 31);
|
||||||
{
|
if(temp == 0) {
|
||||||
int loops=(N-i);
|
scaling = 0;
|
||||||
sum = 0;
|
} else {
|
||||||
xptr1=(G_CONST WebRtc_Word16 *)x;
|
scaling = 32 - WebRtcSpl_NormU32(temp);
|
||||||
xptr2=(G_CONST WebRtc_Word16 *)&x[i];
|
}
|
||||||
|
r[0] = (int32_t)(prod >> scaling);
|
||||||
|
|
||||||
for (j = loops;j > 0; j--)
|
// Perform the actual correlation calculation.
|
||||||
{
|
for (i = 1; i < order + 1; i++) {
|
||||||
sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1++,*xptr2++,scaling);
|
prod = 0;
|
||||||
|
for (j = 0; j < N - i; j++) {
|
||||||
|
prod += WEBRTC_SPL_MUL_16_16(x[j], x[i + j]);
|
||||||
}
|
}
|
||||||
|
sum = (int32_t)(prod >> scaling);
|
||||||
r[i] = sum;
|
r[i] = sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,6 +67,7 @@ int WebRtcIsacfix_AutocorrFix(
|
|||||||
|
|
||||||
return(order + 1);
|
return(order + 1);
|
||||||
}
|
}
|
||||||
|
#endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||||
|
|
||||||
static const WebRtc_Word32 kApUpperQ15[ALLPASSSECTIONS] = { 1137, 12537 };
|
static const WebRtc_Word32 kApUpperQ15[ALLPASSSECTIONS] = { 1137, 12537 };
|
||||||
static const WebRtc_Word32 kApLowerQ15[ALLPASSSECTIONS] = { 5059, 24379 };
|
static const WebRtc_Word32 kApLowerQ15[ALLPASSSECTIONS] = { 5059, 24379 };
|
||||||
|
167
src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c
Normal file
167
src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* filters_neon.c
|
||||||
|
*
|
||||||
|
* This file contains function WebRtcIsacfix_AutocorrFix, optimized for
|
||||||
|
* ARM Neon platform.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "codec.h"
|
||||||
|
|
||||||
|
// Autocorrelation function in fixed point.
|
||||||
|
// NOTE! Different from SPLIB-version in how it scales the signal.
|
||||||
|
int WebRtcIsacfix_AutocorrFix(
|
||||||
|
WebRtc_Word32* __restrict r,
|
||||||
|
const WebRtc_Word16* __restrict x,
|
||||||
|
WebRtc_Word16 N,
|
||||||
|
WebRtc_Word16 order,
|
||||||
|
WebRtc_Word16* __restrict scale) {
|
||||||
|
|
||||||
|
// The 1st for loop assumed N % 4 == 0.
|
||||||
|
assert(N % 4 == 0);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
int zeros_low = 0;
|
||||||
|
int zeros_high = 0;
|
||||||
|
int16_t scaling = 0;
|
||||||
|
int32_t sum = 0;
|
||||||
|
|
||||||
|
// Step 1, calculate r[0] and how much scaling is needed.
|
||||||
|
|
||||||
|
int16x4_t reg16x4;
|
||||||
|
int64x1_t reg64x1a;
|
||||||
|
int64x1_t reg64x1b;
|
||||||
|
int32x4_t reg32x4;
|
||||||
|
int64x2_t reg64x2 = vdupq_n_s64(0); // zeros
|
||||||
|
|
||||||
|
// Loop over the samples and do:
|
||||||
|
// sum += WEBRTC_SPL_MUL_16_16(x[i], x[i]);
|
||||||
|
for (i = 0; i < N; i += 4) {
|
||||||
|
reg16x4 = vld1_s16(&x[i]);
|
||||||
|
reg32x4 = vmull_s16(reg16x4, reg16x4);
|
||||||
|
reg64x2 = vpadalq_s32(reg64x2, reg32x4);
|
||||||
|
}
|
||||||
|
reg64x1a = vget_low_s64(reg64x2);
|
||||||
|
reg64x1b = vget_high_s64(reg64x2);
|
||||||
|
reg64x1a = vadd_s64(reg64x1a, reg64x1b);
|
||||||
|
|
||||||
|
// Calculate the value of shifting (scaling).
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"vmov %[z_l], %[z_h], %P[reg]\n\t"
|
||||||
|
"clz %[z_l], %[z_l]\n\t"
|
||||||
|
"clz %[z_h], %[z_h]\n\t"
|
||||||
|
:[z_l]"+r"(zeros_low),
|
||||||
|
[z_h]"+r"(zeros_high)
|
||||||
|
:[reg]"w"(reg64x1a)
|
||||||
|
);
|
||||||
|
if (zeros_high != 32) {
|
||||||
|
scaling = (32 - zeros_high + 1);
|
||||||
|
} else if (zeros_low == 0) {
|
||||||
|
scaling = 1;
|
||||||
|
}
|
||||||
|
reg64x1b = -scaling;
|
||||||
|
reg64x1a = vshl_s64(reg64x1a, reg64x1b);
|
||||||
|
|
||||||
|
// Record the result.
|
||||||
|
r[0] = (int32_t)vget_lane_s64(reg64x1a, 0);
|
||||||
|
|
||||||
|
|
||||||
|
// Step 2, perform the actual correlation calculation.
|
||||||
|
|
||||||
|
/* Original C code (for the rest of the function):
|
||||||
|
for (i = 1; i < order + 1; i++) {
|
||||||
|
prod = 0;
|
||||||
|
for (j = 0; j < N - i; j++) {
|
||||||
|
prod += WEBRTC_SPL_MUL_16_16(x[j], x[i + j]);
|
||||||
|
}
|
||||||
|
sum = (int32_t)(prod >> scaling);
|
||||||
|
r[i] = sum;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (i = 1; i < order + 1; i++) {
|
||||||
|
int32_t prod_lower = 0;
|
||||||
|
int32_t prod_upper = 0;
|
||||||
|
int16_t* ptr0 = &x[0];
|
||||||
|
int16_t* ptr1 = &x[i];
|
||||||
|
int32_t tmp = 0;
|
||||||
|
|
||||||
|
// Initialize the sum (q9) to zero.
|
||||||
|
__asm__ __volatile__("vmov.i32 q9, #0\n\t":::"q9");
|
||||||
|
|
||||||
|
// Calculate the major block of the samples (a multiple of 8).
|
||||||
|
for (; ptr0 < &x[N - i - 7];) {
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"vld1.16 {d20, d21}, [%[ptr0]]!\n\t"
|
||||||
|
"vld1.16 {d22, d23}, [%[ptr1]]!\n\t"
|
||||||
|
"vmull.s16 q12, d20, d22\n\t"
|
||||||
|
"vmull.s16 q13, d21, d23\n\t"
|
||||||
|
"vpadal.s32 q9, q12\n\t"
|
||||||
|
"vpadal.s32 q9, q13\n\t"
|
||||||
|
|
||||||
|
// Specify constraints.
|
||||||
|
:[ptr0]"+r"(ptr0),
|
||||||
|
[ptr1]"+r"(ptr1)
|
||||||
|
:
|
||||||
|
:"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the rest of the samples.
|
||||||
|
for (; ptr0 < &x[N - i]; ptr0++, ptr1++) {
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"smulbb %[tmp], %[ptr0], %[ptr1]\n\t"
|
||||||
|
"adds %[prod_lower], %[prod_lower], %[tmp]\n\t"
|
||||||
|
"adc %[prod_upper], %[prod_upper], %[tmp], asr #31\n\t"
|
||||||
|
|
||||||
|
// Specify constraints.
|
||||||
|
:[prod_lower]"+r"(prod_lower),
|
||||||
|
[prod_upper]"+r"(prod_upper),
|
||||||
|
[tmp]"+r"(tmp)
|
||||||
|
:[ptr0]"r"(*ptr0),
|
||||||
|
[ptr1]"r"(*ptr1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum the results up, and do shift.
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"vadd.i64 d18, d19\n\t"
|
||||||
|
"vmov.32 d17[0], %[prod_lower]\n\t"
|
||||||
|
"vmov.32 d17[1], %[prod_upper]\n\t"
|
||||||
|
"vadd.i64 d17, d18\n\t"
|
||||||
|
"mov %[tmp], %[scaling], asr #31\n\t"
|
||||||
|
"vmov.32 d16, %[scaling], %[tmp]\n\t"
|
||||||
|
"vshl.s64 d17, d16\n\t"
|
||||||
|
"vmov.32 %[sum], d17[0]\n\t"
|
||||||
|
|
||||||
|
// Specify constraints.
|
||||||
|
:[sum]"=r"(sum),
|
||||||
|
[tmp]"+r"(tmp)
|
||||||
|
:[prod_upper]"r"(prod_upper),
|
||||||
|
[prod_lower]"r"(prod_lower),
|
||||||
|
[scaling]"r"(-scaling)
|
||||||
|
:"d16", "d17", "d18", "d19"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Record the result.
|
||||||
|
r[i] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record the result.
|
||||||
|
*scale = scaling;
|
||||||
|
|
||||||
|
return(order + 1);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user