Refactored and further optimized WebRtcSpl_MaxAbsValueW16() function in splib.
Review URL: https://webrtc-codereview.appspot.com/395008 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1820 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -55,9 +55,9 @@ LOCAL_C_INCLUDES := \
|
||||
|
||||
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||
LOCAL_SRC_FILES += \
|
||||
min_max_operations_neon.c \
|
||||
cross_correlation_neon.s \
|
||||
downsample_fast_neon.s
|
||||
downsample_fast_neon.s \
|
||||
min_max_operations_neon.s
|
||||
LOCAL_CFLAGS += \
|
||||
$(MY_ARM_CFLAGS_NEON)
|
||||
else
|
||||
|
||||
@@ -211,9 +211,17 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
|
||||
// End: Copy and set operations.
|
||||
|
||||
// Minimum and maximum operations. Implementation in min_max_operations.c.
|
||||
// Descriptions at bottom of file.
|
||||
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
|
||||
WebRtc_Word16 length);
|
||||
|
||||
// Returns the largest absolute value in a signed 16-bit vector.
|
||||
//
|
||||
// Input:
|
||||
// - vector : Input vector.
|
||||
// - length : Number of samples in vector.
|
||||
//
|
||||
// Return value : Maximum absolute value in vector.
|
||||
|
||||
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
|
||||
|
||||
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
|
||||
WebRtc_Word16 length);
|
||||
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
|
||||
@@ -855,8 +863,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
|
||||
// Return value : Maximum sample value in vector
|
||||
//
|
||||
|
||||
//
|
||||
// WebRtcSpl_MaxAbsValueW16(...)
|
||||
// WebRtcSpl_MaxAbsValueW32(...)
|
||||
//
|
||||
// Returns the largest absolute value of a vector
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
@@ -28,28 +28,30 @@
|
||||
|
||||
#include "signal_processing_library.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||
|
||||
// Maximum absolute value of word16 vector.
|
||||
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16 *vector, WebRtc_Word16 length)
|
||||
{
|
||||
WebRtc_Word32 tempMax = 0;
|
||||
WebRtc_Word32 absVal;
|
||||
WebRtc_Word16 totMax;
|
||||
int i;
|
||||
G_CONST WebRtc_Word16 *tmpvector = vector;
|
||||
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
|
||||
int i = 0;
|
||||
int absolute = 0;
|
||||
int maximum = -1; // Return -1 if length <= 0.
|
||||
|
||||
for (i = 0; i < length; i++)
|
||||
{
|
||||
absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
|
||||
if (absVal > tempMax)
|
||||
{
|
||||
tempMax = absVal;
|
||||
for (i = 0; i < length; i++) {
|
||||
absolute = abs((int)vector[i]);
|
||||
|
||||
if (absolute > maximum) {
|
||||
maximum = absolute;
|
||||
}
|
||||
tmpvector++;
|
||||
}
|
||||
totMax = (WebRtc_Word16)WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD16_MAX);
|
||||
return totMax;
|
||||
|
||||
// Guard the case for abs(-32768).
|
||||
if (maximum > WEBRTC_SPL_WORD16_MAX) {
|
||||
maximum = WEBRTC_SPL_WORD16_MAX;
|
||||
}
|
||||
|
||||
return (int16_t)maximum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#if (defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "signal_processing_library.h"
|
||||
|
||||
// Maximum absolute value of word16 vector.
|
||||
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
|
||||
WebRtc_Word16 length) {
|
||||
WebRtc_Word32 temp_max = 0;
|
||||
WebRtc_Word32 abs_val;
|
||||
WebRtc_Word16 tot_max;
|
||||
int i;
|
||||
|
||||
__asm__("vmov.i16 d25, #0" : : : "d25");
|
||||
|
||||
for (i = 0; i < length - 7; i += 8) {
|
||||
__asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&vector[i]) : "q13");
|
||||
__asm__("vabs.s16 q13, q13" : : : "q13");
|
||||
__asm__("vpmax.s16 d26, d27" : : : "q13");
|
||||
__asm__("vpmax.s16 d25, d26" : : : "d25", "d26");
|
||||
}
|
||||
__asm__("vpmax.s16 d25, d25" : : : "d25");
|
||||
__asm__("vpmax.s16 d25, d25" : : : "d25");
|
||||
__asm__("vmov.s16 %0, d25[0]" : "=r"(temp_max): : "d25");
|
||||
|
||||
for (; i < length; i++) {
|
||||
abs_val = WEBRTC_SPL_ABS_W32((vector[i]));
|
||||
if (abs_val > temp_max) {
|
||||
temp_max = abs_val;
|
||||
}
|
||||
}
|
||||
tot_max = (WebRtc_Word16)WEBRTC_SPL_MIN(temp_max, WEBRTC_SPL_WORD16_MAX);
|
||||
return tot_max;
|
||||
}
|
||||
|
||||
#endif
|
||||
67
src/common_audio/signal_processing/min_max_operations_neon.s
Normal file
67
src/common_audio/signal_processing/min_max_operations_neon.s
Normal file
@@ -0,0 +1,67 @@
|
||||
@
|
||||
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
@
|
||||
|
||||
@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
|
||||
@ ARM Neon platform. The description header can be found in
|
||||
@ signal_processing_library.h
|
||||
@
|
||||
@ The reference C code is in file min_max_operations.c. Code here is basically
|
||||
@ a loop unrolling by 8 with Neon instructions. Bit-exact.
|
||||
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
.global WebRtcSpl_MaxAbsValueW16
|
||||
.align 2
|
||||
|
||||
WebRtcSpl_MaxAbsValueW16:
|
||||
.fnstart
|
||||
|
||||
vmov.i16 q12, #0
|
||||
mov r2, #-1 @ Return value for the maximum.
|
||||
cmp r1, #0 @ length
|
||||
ble END @ Return -1 if length <= 0.
|
||||
cmp r1, #7
|
||||
ble LOOP_NO_UNROLLING
|
||||
|
||||
lsr r3, r1, #3
|
||||
lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
|
||||
sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8.
|
||||
|
||||
LOOP_UNROLLED_BY_8:
|
||||
vld1.16 {d26, d27}, [r0]!
|
||||
subs r3, #8
|
||||
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
|
||||
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
|
||||
bne LOOP_UNROLLED_BY_8
|
||||
|
||||
@ Find the maximum value in the Neon registers and move it to r2.
|
||||
vmax.u16 d24, d25
|
||||
vpmax.u16 d24, d24
|
||||
vpmax.u16 d24, d24
|
||||
cmp r1, #0
|
||||
vmov.u16 r2, d24[0]
|
||||
ble END
|
||||
|
||||
LOOP_NO_UNROLLING:
|
||||
ldrsh r3, [r0], #2
|
||||
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
|
||||
sub r12, r12, r3, asr #31
|
||||
cmp r2, r12
|
||||
movlt r2, r12
|
||||
subs r1, #1
|
||||
bne LOOP_NO_UNROLLING
|
||||
|
||||
END:
|
||||
cmp r2, #0x8000 @ Guard against the case for -32768.
|
||||
subeq r2, #1
|
||||
mov r0, r2
|
||||
bx lr
|
||||
|
||||
.fnend
|
||||
Reference in New Issue
Block a user