Optimized function WebRtcSpl_ScaleAndAddVectorsWithRound() for ARM-NEON platforms, and refactor it for generic C.
We removed it out of ilbc_specific_functions.c, since it's used not only in iLBC. Passed the unit test. Review URL: https://webrtc-codereview.appspot.com/426009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1904 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
7e26ad3828
commit
bb966ca835
@ -57,7 +57,8 @@ ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
|||||||
LOCAL_SRC_FILES += \
|
LOCAL_SRC_FILES += \
|
||||||
cross_correlation_neon.s \
|
cross_correlation_neon.s \
|
||||||
downsample_fast_neon.s \
|
downsample_fast_neon.s \
|
||||||
min_max_operations_neon.s
|
min_max_operations_neon.s \
|
||||||
|
vector_scaling_operations_neon.s
|
||||||
LOCAL_CFLAGS += \
|
LOCAL_CFLAGS += \
|
||||||
$(MY_ARM_CFLAGS_NEON)
|
$(MY_ARM_CFLAGS_NEON)
|
||||||
else
|
else
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Use of this source code is governed by a BSD-style license
|
* Use of this source code is governed by a BSD-style license
|
||||||
* that can be found in the LICENSE file in the root of the source
|
* that can be found in the LICENSE file in the root of the source
|
||||||
@ -11,35 +11,16 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* This file contains implementations of the iLBC specific functions
|
* This file contains implementations of the iLBC specific functions
|
||||||
* WebRtcSpl_ScaleAndAddVectorsWithRound()
|
|
||||||
* WebRtcSpl_ReverseOrderMultArrayElements()
|
* WebRtcSpl_ReverseOrderMultArrayElements()
|
||||||
* WebRtcSpl_ElementwiseVectorMult()
|
* WebRtcSpl_ElementwiseVectorMult()
|
||||||
* WebRtcSpl_AddVectorsAndShift()
|
* WebRtcSpl_AddVectorsAndShift()
|
||||||
* WebRtcSpl_AddAffineVectorToVector()
|
* WebRtcSpl_AddAffineVectorToVector()
|
||||||
* WebRtcSpl_AffineTransformVector()
|
* WebRtcSpl_AffineTransformVector()
|
||||||
*
|
*
|
||||||
* The description header can be found in signal_processing_library.h
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "signal_processing_library.h"
|
#include "signal_processing_library.h"
|
||||||
|
|
||||||
void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16 *vector1, WebRtc_Word16 scale1,
|
|
||||||
WebRtc_Word16 *vector2, WebRtc_Word16 scale2,
|
|
||||||
WebRtc_Word16 right_shifts, WebRtc_Word16 *out,
|
|
||||||
WebRtc_Word16 vector_length)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
WebRtc_Word16 roundVal;
|
|
||||||
roundVal = 1 << right_shifts;
|
|
||||||
roundVal = roundVal >> 1;
|
|
||||||
for (i = 0; i < vector_length; i++)
|
|
||||||
{
|
|
||||||
out[i] = (WebRtc_Word16)((WEBRTC_SPL_MUL_16_16(vector1[i], scale1)
|
|
||||||
+ WEBRTC_SPL_MUL_16_16(vector2[i], scale2) + roundVal) >> right_shifts);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16 *out, G_CONST WebRtc_Word16 *in,
|
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16 *out, G_CONST WebRtc_Word16 *in,
|
||||||
G_CONST WebRtc_Word16 *win,
|
G_CONST WebRtc_Word16 *win,
|
||||||
WebRtc_Word16 vector_length,
|
WebRtc_Word16 vector_length,
|
||||||
|
@ -268,17 +268,37 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
|
|||||||
WebRtc_Word16 gain2, int right_shifts2,
|
WebRtc_Word16 gain2, int right_shifts2,
|
||||||
WebRtc_Word16* out_vector,
|
WebRtc_Word16* out_vector,
|
||||||
int vector_length);
|
int vector_length);
|
||||||
|
|
||||||
|
// Performs the vector operation:
|
||||||
|
// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
|
||||||
|
// + round_value) >> right_shifts,
|
||||||
|
// where round_value = (1 << right_shifts) >> 1.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - in_vector1 : Input vector 1
|
||||||
|
// - in_vector1_scale : Gain to be used for vector 1
|
||||||
|
// - in_vector2 : Input vector 2
|
||||||
|
// - in_vector2_scale : Gain to be used for vector 2
|
||||||
|
// - right_shifts : Number of right bit shifts to be applied
|
||||||
|
// - length : Number of elements in the input vectors
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// - out_vector : Output vector
|
||||||
|
// Return value : 0 if OK, -1 if (in_vector1 == NULL
|
||||||
|
// || in_vector2 == NULL || out_vector == NULL
|
||||||
|
// || length <= 0 || right_shift < 0).
|
||||||
|
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
|
||||||
|
int16_t in_vector1_scale,
|
||||||
|
const int16_t* in_vector2,
|
||||||
|
int16_t in_vector2_scale,
|
||||||
|
int right_shifts,
|
||||||
|
int16_t* out_vector,
|
||||||
|
int length);
|
||||||
|
|
||||||
// End: Vector scaling operations.
|
// End: Vector scaling operations.
|
||||||
|
|
||||||
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
|
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
|
||||||
// Description at bottom of file.
|
// Description at bottom of file.
|
||||||
void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16* in_vector1,
|
|
||||||
WebRtc_Word16 scale1,
|
|
||||||
WebRtc_Word16* in_vector2,
|
|
||||||
WebRtc_Word16 scale2,
|
|
||||||
WebRtc_Word16 right_shifts,
|
|
||||||
WebRtc_Word16* out_vector,
|
|
||||||
WebRtc_Word16 vector_length);
|
|
||||||
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16* out_vector,
|
void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16* out_vector,
|
||||||
G_CONST WebRtc_Word16* in_vector,
|
G_CONST WebRtc_Word16* in_vector,
|
||||||
G_CONST WebRtc_Word16* window,
|
G_CONST WebRtc_Word16* window,
|
||||||
@ -991,30 +1011,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
|
|||||||
// - out_vector : Output vector
|
// - out_vector : Output vector
|
||||||
//
|
//
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_ScaleAndAddVectorsWithRound(...)
|
|
||||||
//
|
|
||||||
// Performs the vector operation:
|
|
||||||
//
|
|
||||||
// out_vector[k] = ((scale1*in_vector1[k]) + (scale2*in_vector2[k])
|
|
||||||
// + round_value) >> right_shifts
|
|
||||||
//
|
|
||||||
// where:
|
|
||||||
//
|
|
||||||
// round_value = (1<<right_shifts)>>1
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - in_vector1 : Input vector 1
|
|
||||||
// - scale1 : Gain to be used for vector 1
|
|
||||||
// - in_vector2 : Input vector 2
|
|
||||||
// - scale2 : Gain to be used for vector 2
|
|
||||||
// - right_shifts : Number of right bit shifts to be applied
|
|
||||||
// - vector_length : Number of elements in the input vectors
|
|
||||||
//
|
|
||||||
// Output:
|
|
||||||
// - out_vector : Output vector
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// WebRtcSpl_ReverseOrderMultArrayElements(...)
|
// WebRtcSpl_ReverseOrderMultArrayElements(...)
|
||||||
//
|
//
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Use of this source code is governed by a BSD-style license
|
* Use of this source code is governed by a BSD-style license
|
||||||
* that can be found in the LICENSE file in the root of the source
|
* that can be found in the LICENSE file in the root of the source
|
||||||
@ -17,9 +17,7 @@
|
|||||||
* WebRtcSpl_ScaleVector()
|
* WebRtcSpl_ScaleVector()
|
||||||
* WebRtcSpl_ScaleVectorWithSat()
|
* WebRtcSpl_ScaleVectorWithSat()
|
||||||
* WebRtcSpl_ScaleAndAddVectors()
|
* WebRtcSpl_ScaleAndAddVectors()
|
||||||
*
|
* WebRtcSpl_ScaleAndAddVectorsWithRound()
|
||||||
* The description header can be found in signal_processing_library.h
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "signal_processing_library.h"
|
#include "signal_processing_library.h"
|
||||||
@ -149,3 +147,30 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16 *in1, WebRtc_Word16 gain
|
|||||||
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gain2, *in2ptr++, shift2);
|
+ (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gain2, *in2ptr++, shift2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||||
|
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
|
||||||
|
int16_t in_vector1_scale,
|
||||||
|
const int16_t* in_vector2,
|
||||||
|
int16_t in_vector2_scale,
|
||||||
|
int right_shifts,
|
||||||
|
int16_t* out_vector,
|
||||||
|
int length) {
|
||||||
|
int i = 0;
|
||||||
|
int round_value = (1 << right_shifts) >> 1;
|
||||||
|
|
||||||
|
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
|
||||||
|
length <= 0 || right_shifts < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < length; i++) {
|
||||||
|
out_vector[i] = (int16_t)((
|
||||||
|
WEBRTC_SPL_MUL_16_16(in_vector1[i], in_vector1_scale)
|
||||||
|
+ WEBRTC_SPL_MUL_16_16(in_vector2[i], in_vector2_scale)
|
||||||
|
+ round_value) >> right_shifts);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -0,0 +1,88 @@
|
|||||||
|
@
|
||||||
|
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
|
@
|
||||||
|
@ Use of this source code is governed by a BSD-style license
|
||||||
|
@ that can be found in the LICENSE file in the root of the source
|
||||||
|
@ tree. An additional intellectual property rights grant can be found
|
||||||
|
@ in the file PATENTS. All contributing project authors may
|
||||||
|
@ be found in the AUTHORS file in the root of the source tree.
|
||||||
|
@
|
||||||
|
|
||||||
|
@ vector_scaling_operations_neon.s
|
||||||
|
@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(),
|
||||||
|
@ optimized for ARM Neon platform. Output is bit-exact with the reference
|
||||||
|
@ C code in vector_scaling_operations.c.
|
||||||
|
|
||||||
|
.arch armv7-a
|
||||||
|
.fpu neon
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
.global WebRtcSpl_ScaleAndAddVectorsWithRound
|
||||||
|
|
||||||
|
WebRtcSpl_ScaleAndAddVectorsWithRound:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
push {r4-r9}
|
||||||
|
|
||||||
|
ldr r4, [sp, #32] @ length
|
||||||
|
ldr r5, [sp, #28] @ out_vector
|
||||||
|
ldrsh r6, [sp, #24] @ right_shifts
|
||||||
|
|
||||||
|
cmp r4, #0
|
||||||
|
ble END @ Return if length <= 0.
|
||||||
|
|
||||||
|
cmp r4, #8
|
||||||
|
blt SET_ROUND_VALUE
|
||||||
|
|
||||||
|
vdup.16 d26, r1 @ in_vector1_scale
|
||||||
|
vdup.16 d27, r3 @ in_vector2_scale
|
||||||
|
|
||||||
|
@ Neon instructions can only right shift by an immediate value. To shift right
|
||||||
|
@ by a register value, we have to do a left shift left by the negative value.
|
||||||
|
rsb r7, r6, #0
|
||||||
|
vdup.16 q12, r7 @ -right_shifts
|
||||||
|
|
||||||
|
bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8:
|
||||||
|
vld1.16 {d28, d29}, [r0]! @ in_vector1[]
|
||||||
|
vld1.16 {d30, d31}, [r2]! @ in_vector2[]
|
||||||
|
vmull.s16 q0, d28, d26
|
||||||
|
vmull.s16 q1, d29, d26
|
||||||
|
vmull.s16 q2, d30, d27
|
||||||
|
vmull.s16 q3, d31, d27
|
||||||
|
vadd.s32 q0, q2
|
||||||
|
vadd.s32 q1, q3
|
||||||
|
vrshl.s32 q0, q12 @ Round shift right by right_shifts.
|
||||||
|
vrshl.s32 q1, q12
|
||||||
|
vmovn.i32 d0, q0 @ Cast to 16 bit values.
|
||||||
|
vmovn.i32 d1, q1
|
||||||
|
subs r7, #8
|
||||||
|
vst1.16 {d0, d1}, [r5]!
|
||||||
|
bgt LOOP_UNROLLED_BY_8
|
||||||
|
|
||||||
|
ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8.
|
||||||
|
beq END
|
||||||
|
|
||||||
|
SET_ROUND_VALUE:
|
||||||
|
mov r9, #1
|
||||||
|
lsl r9, r6
|
||||||
|
lsr r9, #1
|
||||||
|
|
||||||
|
LOOP_NO_UNROLLING:
|
||||||
|
ldrh r7, [r0], #2
|
||||||
|
ldrh r8, [r2], #2
|
||||||
|
smulbb r7, r7, r1
|
||||||
|
smulbb r8, r8, r3
|
||||||
|
subs r4, #1
|
||||||
|
add r7, r9
|
||||||
|
add r7, r8
|
||||||
|
asr r7, r6
|
||||||
|
strh r7, [r5], #2
|
||||||
|
bne LOOP_NO_UNROLLING
|
||||||
|
|
||||||
|
END:
|
||||||
|
pop {r4-r9}
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
Loading…
x
Reference in New Issue
Block a user