Refactored and further optimized WebRtcSpl_MaxAbsValueW16() function in splib.

Review URL: https://webrtc-codereview.appspot.com/395008

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1820 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org
2012-03-01 20:03:26 +00:00
parent c9a3b81fd2
commit beb1851c2a
5 changed files with 100 additions and 72 deletions

View File

@@ -55,9 +55,9 @@ LOCAL_C_INCLUDES := \
ifeq ($(ARCH_ARM_HAVE_NEON),true) ifeq ($(ARCH_ARM_HAVE_NEON),true)
LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \
min_max_operations_neon.c \
cross_correlation_neon.s \ cross_correlation_neon.s \
downsample_fast_neon.s downsample_fast_neon.s \
min_max_operations_neon.s
LOCAL_CFLAGS += \ LOCAL_CFLAGS += \
$(MY_ARM_CFLAGS_NEON) $(MY_ARM_CFLAGS_NEON)
else else

View File

@@ -211,9 +211,17 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
// End: Copy and set operations. // End: Copy and set operations.
// Minimum and maximum operations. Implementation in min_max_operations.c. // Minimum and maximum operations. Implementation in min_max_operations.c.
// Descriptions at bottom of file.
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector, // Returns the largest absolute value in a signed 16-bit vector.
WebRtc_Word16 length); //
// Input:
// - vector : Input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector.
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length); WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
@@ -855,8 +863,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// Return value : Maximum sample value in vector // Return value : Maximum sample value in vector
// //
//
// WebRtcSpl_MaxAbsValueW16(...)
// WebRtcSpl_MaxAbsValueW32(...) // WebRtcSpl_MaxAbsValueW32(...)
// //
// Returns the largest absolute value of a vector // Returns the largest absolute value of a vector

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
* *
* Use of this source code is governed by a BSD-style license * Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source * that can be found in the LICENSE file in the root of the source
@@ -28,28 +28,30 @@
#include "signal_processing_library.h" #include "signal_processing_library.h"
#include <stdlib.h>
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
// Maximum absolute value of word16 vector. // Maximum absolute value of word16 vector.
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16 *vector, WebRtc_Word16 length) int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
{ int i = 0;
WebRtc_Word32 tempMax = 0; int absolute = 0;
WebRtc_Word32 absVal; int maximum = -1; // Return -1 if length <= 0.
WebRtc_Word16 totMax;
int i;
G_CONST WebRtc_Word16 *tmpvector = vector;
for (i = 0; i < length; i++) for (i = 0; i < length; i++) {
{ absolute = abs((int)vector[i]);
absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
if (absVal > tempMax) if (absolute > maximum) {
{ maximum = absolute;
tempMax = absVal;
} }
tmpvector++;
} }
totMax = (WebRtc_Word16)WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD16_MAX);
return totMax; // Guard the case for abs(-32768).
if (maximum > WEBRTC_SPL_WORD16_MAX) {
maximum = WEBRTC_SPL_WORD16_MAX;
}
return (int16_t)maximum;
} }
#endif #endif

View File

@@ -1,47 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#if (defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
#include <arm_neon.h>
#include "signal_processing_library.h"
// Maximum absolute value of word16 vector.
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
WebRtc_Word16 length) {
WebRtc_Word32 temp_max = 0;
WebRtc_Word32 abs_val;
WebRtc_Word16 tot_max;
int i;
__asm__("vmov.i16 d25, #0" : : : "d25");
for (i = 0; i < length - 7; i += 8) {
__asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&vector[i]) : "q13");
__asm__("vabs.s16 q13, q13" : : : "q13");
__asm__("vpmax.s16 d26, d27" : : : "q13");
__asm__("vpmax.s16 d25, d26" : : : "d25", "d26");
}
__asm__("vpmax.s16 d25, d25" : : : "d25");
__asm__("vpmax.s16 d25, d25" : : : "d25");
__asm__("vmov.s16 %0, d25[0]" : "=r"(temp_max): : "d25");
for (; i < length; i++) {
abs_val = WEBRTC_SPL_ABS_W32((vector[i]));
if (abs_val > temp_max) {
temp_max = abs_val;
}
}
tot_max = (WebRtc_Word16)WEBRTC_SPL_MIN(temp_max, WEBRTC_SPL_WORD16_MAX);
return tot_max;
}
#endif

View File

@@ -0,0 +1,67 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
@ ARM Neon platform. The description header can be found in
@ signal_processing_library.h
@
@ The reference C code is in file min_max_operations.c. Code here is basically
@ a loop unrolling by 8 with Neon instructions. Bit-exact.
.arch armv7-a
.fpu neon
.global WebRtcSpl_MaxAbsValueW16
.align 2
WebRtcSpl_MaxAbsValueW16:
.fnstart
vmov.i16 q12, #0
mov r2, #-1 @ Return value for the maximum.
cmp r1, #0 @ length
ble END @ Return -1 if length <= 0.
cmp r1, #7
ble LOOP_NO_UNROLLING
lsr r3, r1, #3
lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8.
LOOP_UNROLLED_BY_8:
vld1.16 {d26, d27}, [r0]!
subs r3, #8
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
bne LOOP_UNROLLED_BY_8
@ Find the maximum value in the Neon registers and move it to r2.
vmax.u16 d24, d25
vpmax.u16 d24, d24
vpmax.u16 d24, d24
cmp r1, #0
vmov.u16 r2, d24[0]
ble END
LOOP_NO_UNROLLING:
ldrsh r3, [r0], #2
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
sub r12, r12, r3, asr #31
cmp r2, r12
movlt r2, r12
subs r1, #1
bne LOOP_NO_UNROLLING
END:
cmp r2, #0x8000 @ Guard against the case for -32768.
subeq r2, #1
mov r0, r2
bx lr
.fnend