Optimized function WebRtcSpl_ScaleAndAddVectorsWithRound() for ARM-NEON platforms, and refactor it for generic C.
We removed it out of ilbc_specific_functions.c, since it's used not only in iLBC. Passed the unit test. Review URL: https://webrtc-codereview.appspot.com/426009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1904 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
7e26ad3828
commit
bb966ca835
@ -57,7 +57,8 @@ ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||
LOCAL_SRC_FILES += \
|
||||
cross_correlation_neon.s \
|
||||
downsample_fast_neon.s \
|
||||
min_max_operations_neon.s
|
||||
min_max_operations_neon.s \
|
||||
vector_scaling_operations_neon.s
|
||||
LOCAL_CFLAGS += \
|
||||
$(MY_ARM_CFLAGS_NEON)
|
||||
else
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -11,35 +11,16 @@
|
||||
|
||||
/*
|
||||
* This file contains implementations of the iLBC specific functions
|
||||
* WebRtcSpl_ScaleAndAddVectorsWithRound()
|
||||
* WebRtcSpl_ReverseOrderMultArrayElements()
|
||||
* WebRtcSpl_ElementwiseVectorMult()
|
||||
* WebRtcSpl_AddVectorsAndShift()
|
||||
* WebRtcSpl_AddAffineVectorToVector()
|
||||
* WebRtcSpl_AffineTransformVector()
|
||||
*
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16 *vector1, WebRtc_Word16 scale1,
|
||||
WebRtc_Word16 *vector2, WebRtc_Word16 scale2,
|
||||
WebRtc_Word16 right_shifts, WebRtc_Word16 *out,
|
||||
WebRtc_Word16 vector_length)
|
||||
{
|
||||
int i;
|
||||
WebRtc_Word16 roundVal;
|
||||
roundVal = 1 << right_shifts;
|
||||
roundVal = roundVal >> 1;
|
||||
for (i = 0; i < vector_length; i++)
|
||||
{
|
||||
out[i] = (WebRtc_Word16)((WEBRTC_SPL_MUL_16_16(vector1[i], scale1)
|
||||
+ WEBRTC_SPL_MUL_16_16(vector2[i], scale2) + roundVal) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16 *out, G_CONST WebRtc_Word16 *in,
|
||||
G_CONST WebRtc_Word16 *win,
|
||||
WebRtc_Word16 vector_length,
|
||||
|
@ -268,17 +268,37 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
|
||||
WebRtc_Word16 gain2, int right_shifts2,
|
||||
WebRtc_Word16* out_vector,
|
||||
int vector_length);
|
||||
|
||||
// Performs the vector operation:
|
||||
// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
|
||||
// + round_value) >> right_shifts,
|
||||
// where round_value = (1 << right_shifts) >> 1.
|
||||
//
|
||||
// Input:
|
||||
// - in_vector1 : Input vector 1
|
||||
// - in_vector1_scale : Gain to be used for vector 1
|
||||
// - in_vector2 : Input vector 2
|
||||
// - in_vector2_scale : Gain to be used for vector 2
|
||||
// - right_shifts : Number of right bit shifts to be applied
|
||||
// - length : Number of elements in the input vectors
|
||||
//
|
||||
// Output:
|
||||
// - out_vector : Output vector
|
||||
// Return value : 0 if OK, -1 if (in_vector1 == NULL
|
||||
// || in_vector2 == NULL || out_vector == NULL
|
||||
// || length <= 0 || right_shift < 0).
|
||||
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
|
||||
int16_t in_vector1_scale,
|
||||
const int16_t* in_vector2,
|
||||
int16_t in_vector2_scale,
|
||||
int right_shifts,
|
||||
int16_t* out_vector,
|
||||
int length);
|
||||
|
||||
// End: Vector scaling operations.
|
||||
|
||||
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
|
||||
// Description at bottom of file.
|
||||
void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16* in_vector1,
|
||||
WebRtc_Word16 scale1,
|
||||
WebRtc_Word16* in_vector2,
|
||||
WebRtc_Word16 scale2,
|
||||
WebRtc_Word16 right_shifts,
|
||||
WebRtc_Word16* out_vector,
|
||||
WebRtc_Word16 vector_length);
|
||||
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16* out_vector,
|
||||
G_CONST WebRtc_Word16* in_vector,
|
||||
G_CONST WebRtc_Word16* window,
|
||||
@ -991,30 +1011,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
|
||||
// - out_vector : Output vector
|
||||
//
|
||||
|
||||
//
|
||||
// WebRtcSpl_ScaleAndAddVectorsWithRound(...)
|
||||
//
|
||||
// Performs the vector operation:
|
||||
//
|
||||
// out_vector[k] = ((scale1*in_vector1[k]) + (scale2*in_vector2[k])
|
||||
// + round_value) >> right_shifts
|
||||
//
|
||||
// where:
|
||||
//
|
||||
// round_value = (1<<right_shifts)>>1
|
||||
//
|
||||
// Input:
|
||||
// - in_vector1 : Input vector 1
|
||||
// - scale1 : Gain to be used for vector 1
|
||||
// - in_vector2 : Input vector 2
|
||||
// - scale2 : Gain to be used for vector 2
|
||||
// - right_shifts : Number of right bit shifts to be applied
|
||||
// - vector_length : Number of elements in the input vectors
|
||||
//
|
||||
// Output:
|
||||
// - out_vector : Output vector
|
||||
//
|
||||
|
||||
//
|
||||
// WebRtcSpl_ReverseOrderMultArrayElements(...)
|
||||
//
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@ -17,9 +17,7 @@
|
||||
* WebRtcSpl_ScaleVector()
|
||||
* WebRtcSpl_ScaleVectorWithSat()
|
||||
* WebRtcSpl_ScaleAndAddVectors()
|
||||
*
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
* WebRtcSpl_ScaleAndAddVectorsWithRound()
|
||||
*/
|
||||
|
||||
#include "signal_processing_library.h"
|
||||
@ -149,3 +147,30 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16 *in1, WebRtc_Word16 gain
|
||||
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gain2, *in2ptr++, shift2);
|
||||
}
|
||||
}
|
||||
|
||||
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
|
||||
int16_t in_vector1_scale,
|
||||
const int16_t* in_vector2,
|
||||
int16_t in_vector2_scale,
|
||||
int right_shifts,
|
||||
int16_t* out_vector,
|
||||
int length) {
|
||||
int i = 0;
|
||||
int round_value = (1 << right_shifts) >> 1;
|
||||
|
||||
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
|
||||
length <= 0 || right_shifts < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
out_vector[i] = (int16_t)((
|
||||
WEBRTC_SPL_MUL_16_16(in_vector1[i], in_vector1_scale)
|
||||
+ WEBRTC_SPL_MUL_16_16(in_vector2[i], in_vector2_scale)
|
||||
+ round_value) >> right_shifts);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -0,0 +1,88 @@
|
||||
@
|
||||
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
@
|
||||
|
||||
@ vector_scaling_operations_neon.s
|
||||
@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(),
|
||||
@ optimized for ARM Neon platform. Output is bit-exact with the reference
|
||||
@ C code in vector_scaling_operations.c.
|
||||
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.align 2
|
||||
.global WebRtcSpl_ScaleAndAddVectorsWithRound
|
||||
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound:
|
||||
.fnstart
|
||||
|
||||
push {r4-r9}
|
||||
|
||||
ldr r4, [sp, #32] @ length
|
||||
ldr r5, [sp, #28] @ out_vector
|
||||
ldrsh r6, [sp, #24] @ right_shifts
|
||||
|
||||
cmp r4, #0
|
||||
ble END @ Return if length <= 0.
|
||||
|
||||
cmp r4, #8
|
||||
blt SET_ROUND_VALUE
|
||||
|
||||
vdup.16 d26, r1 @ in_vector1_scale
|
||||
vdup.16 d27, r3 @ in_vector2_scale
|
||||
|
||||
@ Neon instructions can only right shift by an immediate value. To shift right
|
||||
@ by a register value, we have to do a left shift left by the negative value.
|
||||
rsb r7, r6, #0
|
||||
vdup.16 q12, r7 @ -right_shifts
|
||||
|
||||
bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
|
||||
|
||||
LOOP_UNROLLED_BY_8:
|
||||
vld1.16 {d28, d29}, [r0]! @ in_vector1[]
|
||||
vld1.16 {d30, d31}, [r2]! @ in_vector2[]
|
||||
vmull.s16 q0, d28, d26
|
||||
vmull.s16 q1, d29, d26
|
||||
vmull.s16 q2, d30, d27
|
||||
vmull.s16 q3, d31, d27
|
||||
vadd.s32 q0, q2
|
||||
vadd.s32 q1, q3
|
||||
vrshl.s32 q0, q12 @ Round shift right by right_shifts.
|
||||
vrshl.s32 q1, q12
|
||||
vmovn.i32 d0, q0 @ Cast to 16 bit values.
|
||||
vmovn.i32 d1, q1
|
||||
subs r7, #8
|
||||
vst1.16 {d0, d1}, [r5]!
|
||||
bgt LOOP_UNROLLED_BY_8
|
||||
|
||||
ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8.
|
||||
beq END
|
||||
|
||||
SET_ROUND_VALUE:
|
||||
mov r9, #1
|
||||
lsl r9, r6
|
||||
lsr r9, #1
|
||||
|
||||
LOOP_NO_UNROLLING:
|
||||
ldrh r7, [r0], #2
|
||||
ldrh r8, [r2], #2
|
||||
smulbb r7, r7, r1
|
||||
smulbb r8, r8, r3
|
||||
subs r4, #1
|
||||
add r7, r9
|
||||
add r7, r8
|
||||
asr r7, r6
|
||||
strh r7, [r5], #2
|
||||
bne LOOP_NO_UNROLLING
|
||||
|
||||
END:
|
||||
pop {r4-r9}
|
||||
bx lr
|
||||
|
||||
.fnend
|
Loading…
x
Reference in New Issue
Block a user