Optimized function WebRtcSpl_ScaleAndAddVectorsWithRound() for ARM-NEON platforms, and refactor it for generic C.

We removed it out of ilbc_specific_functions.c, since it's used not only in iLBC.

Passed the unit test.
Review URL: https://webrtc-codereview.appspot.com/426009

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1904 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org
2012-03-16 16:29:37 +00:00
parent 7e26ad3828
commit bb966ca835
5 changed files with 147 additions and 56 deletions

View File

@@ -57,7 +57,8 @@ ifeq ($(ARCH_ARM_HAVE_NEON),true)
LOCAL_SRC_FILES += \
cross_correlation_neon.s \
downsample_fast_neon.s \
min_max_operations_neon.s
min_max_operations_neon.s \
vector_scaling_operations_neon.s
LOCAL_CFLAGS += \
$(MY_ARM_CFLAGS_NEON)
else

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -11,35 +11,16 @@
/*
* This file contains implementations of the iLBC specific functions
* WebRtcSpl_ScaleAndAddVectorsWithRound()
* WebRtcSpl_ReverseOrderMultArrayElements()
* WebRtcSpl_ElementwiseVectorMult()
* WebRtcSpl_AddVectorsAndShift()
* WebRtcSpl_AddAffineVectorToVector()
* WebRtcSpl_AffineTransformVector()
*
* The description header can be found in signal_processing_library.h
*
*/
#include "signal_processing_library.h"
void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16 *vector1, WebRtc_Word16 scale1,
WebRtc_Word16 *vector2, WebRtc_Word16 scale2,
WebRtc_Word16 right_shifts, WebRtc_Word16 *out,
WebRtc_Word16 vector_length)
{
int i;
WebRtc_Word16 roundVal;
roundVal = 1 << right_shifts;
roundVal = roundVal >> 1;
for (i = 0; i < vector_length; i++)
{
out[i] = (WebRtc_Word16)((WEBRTC_SPL_MUL_16_16(vector1[i], scale1)
+ WEBRTC_SPL_MUL_16_16(vector2[i], scale2) + roundVal) >> right_shifts);
}
}
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16 *out, G_CONST WebRtc_Word16 *in,
G_CONST WebRtc_Word16 *win,
WebRtc_Word16 vector_length,

View File

@@ -268,17 +268,37 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
WebRtc_Word16 gain2, int right_shifts2,
WebRtc_Word16* out_vector,
int vector_length);
// Performs the vector operation:
// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
// + round_value) >> right_shifts,
// where round_value = (1 << right_shifts) >> 1.
//
// Input:
// - in_vector1 : Input vector 1
// - in_vector1_scale : Gain to be used for vector 1
// - in_vector2 : Input vector 2
// - in_vector2_scale : Gain to be used for vector 2
// - right_shifts : Number of right bit shifts to be applied
// - length : Number of elements in the input vectors
//
// Output:
// - out_vector : Output vector
// Return value : 0 if OK, -1 if (in_vector1 == NULL
// || in_vector2 == NULL || out_vector == NULL
// || length <= 0 || right_shift < 0).
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length);
// End: Vector scaling operations.
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
// Description at bottom of file.
void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16* in_vector1,
WebRtc_Word16 scale1,
WebRtc_Word16* in_vector2,
WebRtc_Word16 scale2,
WebRtc_Word16 right_shifts,
WebRtc_Word16* out_vector,
WebRtc_Word16 vector_length);
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16* out_vector,
G_CONST WebRtc_Word16* in_vector,
G_CONST WebRtc_Word16* window,
@@ -991,30 +1011,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// - out_vector : Output vector
//
//
// WebRtcSpl_ScaleAndAddVectorsWithRound(...)
//
// Performs the vector operation:
//
// out_vector[k] = ((scale1*in_vector1[k]) + (scale2*in_vector2[k])
// + round_value) >> right_shifts
//
// where:
//
// round_value = (1<<right_shifts)>>1
//
// Input:
// - in_vector1 : Input vector 1
// - scale1 : Gain to be used for vector 1
// - in_vector2 : Input vector 2
// - scale2 : Gain to be used for vector 2
// - right_shifts : Number of right bit shifts to be applied
// - vector_length : Number of elements in the input vectors
//
// Output:
// - out_vector : Output vector
//
//
// WebRtcSpl_ReverseOrderMultArrayElements(...)
//

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -17,9 +17,7 @@
* WebRtcSpl_ScaleVector()
* WebRtcSpl_ScaleVectorWithSat()
* WebRtcSpl_ScaleAndAddVectors()
*
* The description header can be found in signal_processing_library.h
*
* WebRtcSpl_ScaleAndAddVectorsWithRound()
*/
#include "signal_processing_library.h"
@@ -149,3 +147,30 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16 *in1, WebRtc_Word16 gain
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gain2, *in2ptr++, shift2);
}
}
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length) {
int i = 0;
int round_value = (1 << right_shifts) >> 1;
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
length <= 0 || right_shifts < 0) {
return -1;
}
for (i = 0; i < length; i++) {
out_vector[i] = (int16_t)((
WEBRTC_SPL_MUL_16_16(in_vector1[i], in_vector1_scale)
+ WEBRTC_SPL_MUL_16_16(in_vector2[i], in_vector2_scale)
+ round_value) >> right_shifts);
}
return 0;
}
#endif

View File

@@ -0,0 +1,88 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ vector_scaling_operations_neon.s
@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(),
@ optimized for ARM Neon platform. Output is bit-exact with the reference
@ C code in vector_scaling_operations.c.
.arch armv7-a
.fpu neon
.align 2
.global WebRtcSpl_ScaleAndAddVectorsWithRound
WebRtcSpl_ScaleAndAddVectorsWithRound:
.fnstart
push {r4-r9}
ldr r4, [sp, #32] @ length
ldr r5, [sp, #28] @ out_vector
ldrsh r6, [sp, #24] @ right_shifts
cmp r4, #0
ble END @ Return if length <= 0.
cmp r4, #8
blt SET_ROUND_VALUE
vdup.16 d26, r1 @ in_vector1_scale
vdup.16 d27, r3 @ in_vector2_scale
@ Neon instructions can only right shift by an immediate value. To shift right
@ by a register value, we have to do a left shift left by the negative value.
rsb r7, r6, #0
vdup.16 q12, r7 @ -right_shifts
bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
LOOP_UNROLLED_BY_8:
vld1.16 {d28, d29}, [r0]! @ in_vector1[]
vld1.16 {d30, d31}, [r2]! @ in_vector2[]
vmull.s16 q0, d28, d26
vmull.s16 q1, d29, d26
vmull.s16 q2, d30, d27
vmull.s16 q3, d31, d27
vadd.s32 q0, q2
vadd.s32 q1, q3
vrshl.s32 q0, q12 @ Round shift right by right_shifts.
vrshl.s32 q1, q12
vmovn.i32 d0, q0 @ Cast to 16 bit values.
vmovn.i32 d1, q1
subs r7, #8
vst1.16 {d0, d1}, [r5]!
bgt LOOP_UNROLLED_BY_8
ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8.
beq END
SET_ROUND_VALUE:
mov r9, #1
lsl r9, r6
lsr r9, #1
LOOP_NO_UNROLLING:
ldrh r7, [r0], #2
ldrh r8, [r2], #2
smulbb r7, r7, r1
smulbb r8, r8, r3
subs r4, #1
add r7, r9
add r7, r8
asr r7, r6
strh r7, [r5], #2
bne LOOP_NO_UNROLLING
END:
pop {r4-r9}
bx lr
.fnend