Refactored and further optimized WebRtcSpl_MaxAbsValueW16() function in splib.

Review URL: https://webrtc-codereview.appspot.com/395008 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1820 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-03-01 20:03:26 +00:00
parent c9a3b81fd2
commit beb1851c2a
5 changed files with 100 additions and 72 deletions
--- a/src/common_audio/signal_processing/Android.mk
+++ b/src/common_audio/signal_processing/Android.mk
@@ -55,9 +55,9 @@ LOCAL_C_INCLUDES := \

 ifeq ($(ARCH_ARM_HAVE_NEON),true)
 LOCAL_SRC_FILES += \
-    min_max_operations_neon.c \
    cross_correlation_neon.s \
-    downsample_fast_neon.s
+    downsample_fast_neon.s \
+    min_max_operations_neon.s
 LOCAL_CFLAGS += \
    $(MY_ARM_CFLAGS_NEON)
 else
--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@@ -211,9 +211,17 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
 // End: Copy and set operations.

 // Minimum and maximum operations. Implementation in min_max_operations.c.
-// Descriptions at bottom of file.
-WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
-                                       WebRtc_Word16 length);
+
+// Returns the largest absolute value in a signed 16-bit vector.
+//
+// Input:
+//      - vector :   Input vector.
+//      - length :   Number of samples in vector.
+//
+// Return value  :   Maximum absolute value in vector.
+
+int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
+
 WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
                                       WebRtc_Word16 length);
 WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
@@ -855,8 +863,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 // Return value         : Maximum sample value in vector
 //

-//
-// WebRtcSpl_MaxAbsValueW16(...)
 // WebRtcSpl_MaxAbsValueW32(...)
 //
 // Returns the largest absolute value of a vector
--- a/src/common_audio/signal_processing/min_max_operations.c
+++ b/src/common_audio/signal_processing/min_max_operations.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -28,28 +28,30 @@

 #include "signal_processing_library.h"

+#include <stdlib.h>
+
 #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))

 // Maximum absolute value of word16 vector.
-WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16 *vector, WebRtc_Word16 length)
-{
-    WebRtc_Word32 tempMax = 0;
-    WebRtc_Word32 absVal;
-    WebRtc_Word16 totMax;
-    int i;
-    G_CONST WebRtc_Word16 *tmpvector = vector;
+int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
+  int i = 0;
+  int absolute = 0;
+  int maximum = -1;  // Return -1 if length <= 0.

-    for (i = 0; i < length; i++)
-    {
-        absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
-        if (absVal > tempMax)
-        {
-            tempMax = absVal;
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
    }
-        tmpvector++;
  }
-    totMax = (WebRtc_Word16)WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD16_MAX);
-    return totMax;
+
+  // Guard the case for abs(-32768).
+  if (maximum > WEBRTC_SPL_WORD16_MAX) {
+    maximum = WEBRTC_SPL_WORD16_MAX;
+  }
+
+  return (int16_t)maximum;
 }

 #endif
--- a/src/common_audio/signal_processing/min_max_operations_neon.c
+++ b/src/common_audio/signal_processing/min_max_operations_neon.c
@@ -1,47 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#if (defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
-
-#include <arm_neon.h>
-
-#include "signal_processing_library.h"
-
-// Maximum absolute value of word16 vector.
-WebRtc_Word16 WebRtcSpl_MaxAbsValueW16(const WebRtc_Word16* vector,
-                                       WebRtc_Word16 length) {
-  WebRtc_Word32 temp_max = 0;
-  WebRtc_Word32 abs_val;
-  WebRtc_Word16 tot_max;
-  int i;
-
-  __asm__("vmov.i16 d25, #0" : : : "d25");
-
-  for (i = 0; i < length - 7; i += 8) {
-    __asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&vector[i]) : "q13");
-    __asm__("vabs.s16 q13, q13" : : : "q13");
-    __asm__("vpmax.s16 d26, d27" : : : "q13");
-    __asm__("vpmax.s16 d25, d26" : : : "d25", "d26");
-  }
-  __asm__("vpmax.s16 d25, d25" : : : "d25");
-  __asm__("vpmax.s16 d25, d25" : : : "d25");
-  __asm__("vmov.s16 %0, d25[0]" : "=r"(temp_max): : "d25");
-
-  for (; i < length; i++) {
-    abs_val = WEBRTC_SPL_ABS_W32((vector[i]));
-    if (abs_val > temp_max) {
-      temp_max = abs_val;
-    }
-  }
-  tot_max = (WebRtc_Word16)WEBRTC_SPL_MIN(temp_max, WEBRTC_SPL_WORD16_MAX);
-  return tot_max;
-}
-
-#endif
--- a/src/common_audio/signal_processing/min_max_operations_neon.s
+++ b/src/common_audio/signal_processing/min_max_operations_neon.s
@@ -0,0 +1,67 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
+@ ARM Neon platform. The description header can be found in
+@ signal_processing_library.h
+@
+@ The reference C code is in file min_max_operations.c. Code here is basically
+@ a loop unrolling by 8 with Neon instructions. Bit-exact.
+
+.arch armv7-a
+.fpu neon
+.global WebRtcSpl_MaxAbsValueW16
+.align  2
+
+WebRtcSpl_MaxAbsValueW16:
+.fnstart
+
+  vmov.i16 q12, #0
+  mov r2, #-1                 @ Return value for the maximum.
+  cmp r1, #0                  @ length
+  ble END                     @ Return -1 if length <= 0.
+  cmp r1, #7
+  ble LOOP_NO_UNROLLING
+
+  lsr r3, r1, #3
+  lsl r3, #3                  @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
+  sub r1, r3                  @ Counter for LOOP_NO_UNROLLING: length % 8.
+
+LOOP_UNROLLED_BY_8:
+  vld1.16 {d26, d27}, [r0]!
+  subs r3, #8
+  vabs.s16 q13, q13           @ Note vabs doesn't change the value of -32768.
+  vmax.u16 q12, q13           @ Use u16 so we don't lose the value -32768.
+  bne LOOP_UNROLLED_BY_8
+
+  @ Find the maximum value in the Neon registers and move it to r2.
+  vmax.u16 d24, d25
+  vpmax.u16 d24, d24
+  vpmax.u16 d24, d24
+  cmp r1, #0
+  vmov.u16 r2, d24[0]
+  ble END
+
+LOOP_NO_UNROLLING:
+  ldrsh r3, [r0], #2
+  eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
+  sub r12, r12, r3, asr #31
+  cmp r2, r12
+  movlt r2, r12
+  subs r1, #1
+  bne LOOP_NO_UNROLLING
+
+END:
+  cmp r2, #0x8000             @ Guard against the case for -32768.
+  subeq r2, #1
+  mov r0, r2
+  bx  lr
+
+.fnend