Refactored and further optimized WebRtcSpl_MaxAbsValueW16() function in splib.
Review URL: https://webrtc-codereview.appspot.com/395008 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1820 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -55,9 +55,9 @@ LOCAL_C_INCLUDES := \
|
|||||||
|
|
||||||
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||||
LOCAL_SRC_FILES += \
|
LOCAL_SRC_FILES += \
|
||||||
min_max_operations_neon.c \
|
|
||||||
cross_correlation_neon.s \
|
cross_correlation_neon.s \
|
||||||
downsample_fast_neon.s
|
downsample_fast_neon.s \
|
||||||
|
min_max_operations_neon.s
|
||||||
LOCAL_CFLAGS += \
|
LOCAL_CFLAGS += \
|
||||||
$(MY_ARM_CFLAGS_NEON)
|
$(MY_ARM_CFLAGS_NEON)
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -211,9 +211,17 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
|
|||||||
// End: Copy and set operations.
|
// End: Copy and set operations.
|
||||||
|
|
||||||
// Minimum and maximum operations. Implementation in min_max_operations.c.
|
// Minimum and maximum operations. Implementation in min_max_operations.c.
|
||||||
// Descriptions at bottom of file.
|
|
||||||
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
|
// Returns the largest absolute value in a signed 16-bit vector.
|
||||||
WebRtc_Word16 length);
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : Input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Maximum absolute value in vector.
|
||||||
|
|
||||||
|
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
|
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
|
||||||
WebRtc_Word16 length);
|
WebRtc_Word16 length);
|
||||||
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
|
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
|
||||||
@@ -855,8 +863,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
|
|||||||
// Return value : Maximum sample value in vector
|
// Return value : Maximum sample value in vector
|
||||||
//
|
//
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_MaxAbsValueW16(...)
|
|
||||||
// WebRtcSpl_MaxAbsValueW32(...)
|
// WebRtcSpl_MaxAbsValueW32(...)
|
||||||
//
|
//
|
||||||
// Returns the largest absolute value of a vector
|
// Returns the largest absolute value of a vector
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Use of this source code is governed by a BSD-style license
|
* Use of this source code is governed by a BSD-style license
|
||||||
* that can be found in the LICENSE file in the root of the source
|
* that can be found in the LICENSE file in the root of the source
|
||||||
@@ -28,28 +28,30 @@
|
|||||||
|
|
||||||
#include "signal_processing_library.h"
|
#include "signal_processing_library.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||||
|
|
||||||
// Maximum absolute value of word16 vector.
|
// Maximum absolute value of word16 vector.
|
||||||
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16 *vector, WebRtc_Word16 length)
|
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
|
||||||
{
|
int i = 0;
|
||||||
WebRtc_Word32 tempMax = 0;
|
int absolute = 0;
|
||||||
WebRtc_Word32 absVal;
|
int maximum = -1; // Return -1 if length <= 0.
|
||||||
WebRtc_Word16 totMax;
|
|
||||||
int i;
|
|
||||||
G_CONST WebRtc_Word16 *tmpvector = vector;
|
|
||||||
|
|
||||||
for (i = 0; i < length; i++)
|
for (i = 0; i < length; i++) {
|
||||||
{
|
absolute = abs((int)vector[i]);
|
||||||
absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
|
|
||||||
if (absVal > tempMax)
|
if (absolute > maximum) {
|
||||||
{
|
maximum = absolute;
|
||||||
tempMax = absVal;
|
|
||||||
}
|
}
|
||||||
tmpvector++;
|
|
||||||
}
|
}
|
||||||
totMax = (WebRtc_Word16)WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD16_MAX);
|
|
||||||
return totMax;
|
// Guard the case for abs(-32768).
|
||||||
|
if (maximum > WEBRTC_SPL_WORD16_MAX) {
|
||||||
|
maximum = WEBRTC_SPL_WORD16_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int16_t)maximum;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,47 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if (defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
|
||||||
|
|
||||||
#include <arm_neon.h>
|
|
||||||
|
|
||||||
#include "signal_processing_library.h"
|
|
||||||
|
|
||||||
// Maximum absolute value of word16 vector.
|
|
||||||
WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
|
|
||||||
WebRtc_Word16 length) {
|
|
||||||
WebRtc_Word32 temp_max = 0;
|
|
||||||
WebRtc_Word32 abs_val;
|
|
||||||
WebRtc_Word16 tot_max;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
__asm__("vmov.i16 d25, #0" : : : "d25");
|
|
||||||
|
|
||||||
for (i = 0; i < length - 7; i += 8) {
|
|
||||||
__asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&vector[i]) : "q13");
|
|
||||||
__asm__("vabs.s16 q13, q13" : : : "q13");
|
|
||||||
__asm__("vpmax.s16 d26, d27" : : : "q13");
|
|
||||||
__asm__("vpmax.s16 d25, d26" : : : "d25", "d26");
|
|
||||||
}
|
|
||||||
__asm__("vpmax.s16 d25, d25" : : : "d25");
|
|
||||||
__asm__("vpmax.s16 d25, d25" : : : "d25");
|
|
||||||
__asm__("vmov.s16 %0, d25[0]" : "=r"(temp_max): : "d25");
|
|
||||||
|
|
||||||
for (; i < length; i++) {
|
|
||||||
abs_val = WEBRTC_SPL_ABS_W32((vector[i]));
|
|
||||||
if (abs_val > temp_max) {
|
|
||||||
temp_max = abs_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tot_max = (WebRtc_Word16)WEBRTC_SPL_MIN(temp_max, WEBRTC_SPL_WORD16_MAX);
|
|
||||||
return tot_max;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
67
src/common_audio/signal_processing/min_max_operations_neon.s
Normal file
67
src/common_audio/signal_processing/min_max_operations_neon.s
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
@
|
||||||
|
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
|
@
|
||||||
|
@ Use of this source code is governed by a BSD-style license
|
||||||
|
@ that can be found in the LICENSE file in the root of the source
|
||||||
|
@ tree. An additional intellectual property rights grant can be found
|
||||||
|
@ in the file PATENTS. All contributing project authors may
|
||||||
|
@ be found in the AUTHORS file in the root of the source tree.
|
||||||
|
@
|
||||||
|
|
||||||
|
@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
|
||||||
|
@ ARM Neon platform. The description header can be found in
|
||||||
|
@ signal_processing_library.h
|
||||||
|
@
|
||||||
|
@ The reference C code is in file min_max_operations.c. Code here is basically
|
||||||
|
@ a loop unrolling by 8 with Neon instructions. Bit-exact.
|
||||||
|
|
||||||
|
.arch armv7-a
|
||||||
|
.fpu neon
|
||||||
|
.global WebRtcSpl_MaxAbsValueW16
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
WebRtcSpl_MaxAbsValueW16:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
vmov.i16 q12, #0
|
||||||
|
mov r2, #-1 @ Return value for the maximum.
|
||||||
|
cmp r1, #0 @ length
|
||||||
|
ble END @ Return -1 if length <= 0.
|
||||||
|
cmp r1, #7
|
||||||
|
ble LOOP_NO_UNROLLING
|
||||||
|
|
||||||
|
lsr r3, r1, #3
|
||||||
|
lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
|
||||||
|
sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8.
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8:
|
||||||
|
vld1.16 {d26, d27}, [r0]!
|
||||||
|
subs r3, #8
|
||||||
|
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
|
||||||
|
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
|
||||||
|
bne LOOP_UNROLLED_BY_8
|
||||||
|
|
||||||
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
|
vmax.u16 d24, d25
|
||||||
|
vpmax.u16 d24, d24
|
||||||
|
vpmax.u16 d24, d24
|
||||||
|
cmp r1, #0
|
||||||
|
vmov.u16 r2, d24[0]
|
||||||
|
ble END
|
||||||
|
|
||||||
|
LOOP_NO_UNROLLING:
|
||||||
|
ldrsh r3, [r0], #2
|
||||||
|
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
|
||||||
|
sub r12, r12, r3, asr #31
|
||||||
|
cmp r2, r12
|
||||||
|
movlt r2, r12
|
||||||
|
subs r1, #1
|
||||||
|
bne LOOP_NO_UNROLLING
|
||||||
|
|
||||||
|
END:
|
||||||
|
cmp r2, #0x8000 @ Guard against the case for -32768.
|
||||||
|
subeq r2, #1
|
||||||
|
mov r0, r2
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
Reference in New Issue
Block a user