Optimizations on several SPL min max operations in ARM, and refactoring in C.

Touched C and assembly functions are tested with a new unit test which is not in the code base yet. Review URL: https://webrtc-codereview.appspot.com/428004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1974 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-04-02 03:55:20 +00:00 · 2012-04-02 03:55:20 +00:00 · 95c3d408f5
commit 95c3d408f5
parent f3bbc3e5b3
3 changed files with 544 additions and 300 deletions
--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@ -34,6 +34,8 @@
 #define WEBRTC_SPL_MAX_SEED_USED    0x80000000L
 #define WEBRTC_SPL_MIN(A, B)        (A < B ? A : B) // Get min value
 #define WEBRTC_SPL_MAX(A, B)        (A > B ? A : B) // Get max value
 // TODO(kma/bjorn): For the next two macros, investigate how to correct the code
 // for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
 #define WEBRTC_SPL_ABS_W16(a) \
    (((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
 #define WEBRTC_SPL_ABS_W32(a) \
@ -202,41 +204,130 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
                                     WebRtc_Word16 vector_length);
 // End: Copy and set operations.
 // Minimum and maximum operations. Implementation in min_max_operations.c.
 // Returns the largest absolute value in a signed 16-bit vector.
 //
 // Input:
-//      - vector :   Input vector.
+//      - vector : 16-bit input vector.
-//      - length :   Number of samples in vector.
+//      - length : Number of samples in vector.
 //
-// Return value  :   Maximum absolute value in vector.
+// Return value  : Maximum absolute value in vector;
-
+//                 or -1, if (vector == NULL || length <= 0).
 int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
-WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
+// Returns the largest absolute value in a signed 32-bit vector.
-                                       WebRtc_Word16 length);
+//
-WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
+// Input:
-                                    WebRtc_Word16 length);
+//      - vector : 32-bit input vector.
-WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector,
+//      - length : Number of samples in vector.
-                                    WebRtc_Word16 length);
+//
-WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector,
+// Return value  : Maximum absolute value in vector;
-                                    WebRtc_Word16 length);
+//                 or -1, if (vector == NULL || length <= 0).
 int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
 // Returns the maximum value of a 16-bit vector.
 //
 // Input:
 //      - vector : 16-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Maximum sample value in |vector|.
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
 //                 is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
 //                 value and we can't catch errors purely based on it.
 int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
 // Returns the maximum value of a 32-bit vector.
 //
 // Input:
 //      - vector : 32-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Maximum sample value in |vector|.
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
 //                 is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
 //                 value and we can't catch errors purely based on it.
 int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
 // Returns the minimum value of a 16-bit vector.
 //
 // Input:
 //      - vector : 16-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Minimum sample value in |vector|.
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
 //                 is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
 //                 value and we can't catch errors purely based on it.
 int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
 // Returns the minimum value of a 32-bit vector.
 //
 // Input:
 //      - vector : 32-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Minimum sample value in |vector|.
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
 //                 is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
 //                 value and we can't catch errors purely based on it.
 int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
 // Returns the vector index to the largest absolute value of a 16-bit vector.
 //
 // Input:
 //      - vector : 16-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Index to the maximum absolute value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
 int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length);
 // Returns the vector index to the maximum sample value of a 16-bit vector.
 //
 // Input:
 //      - vector : 16-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Index to the maximum value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
 int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length);
 // Returns the vector index to the maximum sample value of a 32-bit vector.
 //
 // Input:
 //      - vector : 32-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Index to the maximum value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
 int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length);
 // Returns the vector index to the minimum sample value of a 16-bit vector.
 //
 // Input:
 //      - vector : 16-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Index to the mimimum value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
 int WebRtcSpl_MinIndexW16(const int16_t* vector, int length);
 // Returns the vector index to the minimum sample value of a 32-bit vector.
 //
 // Input:
 //      - vector : 32-bit input vector.
 //      - length : Number of samples in vector.
 //
 // Return value  : Index to the mimimum value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
 int WebRtcSpl_MinIndexW32(const int32_t* vector, int length);
 WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector,
                                       WebRtc_Word16 length);
 WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector,
                                    WebRtc_Word16 length);
 WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector,
                                    WebRtc_Word16 length);
 WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector,
                                    WebRtc_Word16 length);
 WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector,
                                    WebRtc_Word16 length);
 WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector,
                                    WebRtc_Word16 length);
 // End: Minimum and maximum operations.
 // Vector scaling operations. Implementation in vector_scaling_operations.c.
 // Description at bottom of file.
 void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
@ -849,81 +940,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 // Return value         : Number of samples in vector
 //
 //
 // WebRtcSpl_MinValueW16(...)
 // WebRtcSpl_MinValueW32(...)
 //
 // Returns the minimum value of a vector
 //
 // Input:
 //      - vector        : Input vector
 //      - vector_length : Number of samples in vector
 //
 // Return value         : Minimum sample value in vector
 //
 //
 // WebRtcSpl_MaxValueW16(...)
 // WebRtcSpl_MaxValueW32(...)
 //
 // Returns the maximum value of a vector
 //
 // Input:
 //      - vector        : Input vector
 //      - vector_length : Number of samples in vector
 //
 // Return value         : Maximum sample value in vector
 //
 // WebRtcSpl_MaxAbsValueW32(...)
 //
 // Returns the largest absolute value of a vector
 //
 // Input:
 //      - vector        : Input vector
 //      - vector_length : Number of samples in vector
 //
 // Return value         : Maximum absolute value in vector
 //
 //
 // WebRtcSpl_MaxAbsIndexW16(...)
 //
 // Returns the vector index to the largest absolute value of a vector
 //
 // Input:
 //      - vector        : Input vector
 //      - vector_length : Number of samples in vector
 //
 // Return value         : Index to maximum absolute value in vector
 //
 //
 // WebRtcSpl_MinIndexW16(...)
 // WebRtcSpl_MinIndexW32(...)
 //
 // Returns the vector index to the minimum sample value of a vector
 //
 // Input:
 //      - vector        : Input vector
 //      - vector_length : Number of samples in vector
 //
 // Return value         : Index to minimum sample value in vector
 //
 //
 // WebRtcSpl_MaxIndexW16(...)
 // WebRtcSpl_MaxIndexW32(...)
 //
 // Returns the vector index to the maximum sample value of a vector
 //
 // Input:
 //      - vector        : Input vector
 //      - vector_length : Number of samples in vector
 //
 // Return value         : Index to maximum sample value in vector
 //
 //
 // WebRtcSpl_VectorBitShiftW16(...)
 // WebRtcSpl_VectorBitShiftW32(...)
--- a/src/common_audio/signal_processing/min_max_operations.c
+++ b/src/common_audio/signal_processing/min_max_operations.c
@ -11,32 +11,35 @@
 /*
 * This file contains the implementation of functions
 * WebRtcSpl_MaxAbsValueW16()
 * WebRtcSpl_MaxAbsIndexW16()
 * WebRtcSpl_MaxAbsValueW32()
 * WebRtcSpl_MaxValueW16()
 * WebRtcSpl_MaxIndexW16()
 * WebRtcSpl_MaxValueW32()
 * WebRtcSpl_MaxIndexW32()
 * WebRtcSpl_MinValueW16()
 * WebRtcSpl_MinIndexW16()
 * WebRtcSpl_MinValueW32()
 * WebRtcSpl_MaxAbsIndexW16()
 * WebRtcSpl_MaxIndexW16()
 * WebRtcSpl_MaxIndexW32()
 * WebRtcSpl_MinIndexW16()
 * WebRtcSpl_MinIndexW32()
 *
 * The description header can be found in signal_processing_library.h.
 *
 */
 #include "signal_processing_library.h"
 #include <stdlib.h>
 // TODO(bjorn/kma): Consolidate function pairs (e.g. combine
 // WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
 #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
 // Maximum absolute value of word16 vector.
 int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
-  int i = 0;
+  int i = 0, absolute = 0, maximum = 0;
-  int absolute = 0;
+
-  int maximum = -1;  // Return -1 if length <= 0.
+  if (vector == NULL || length <= 0) {
    return -1;
  }
  for (i = 0; i < length; i++) {
    absolute = abs((int)vector[i]);
@ -54,214 +57,201 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
  return (int16_t)maximum;
 }
 #endif
 // Index of maximum absolute value in a  word16 vector.
 WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
 {
    WebRtc_Word16 tempMax;
    WebRtc_Word16 absTemp;
    WebRtc_Word16 tempMaxIndex = 0;
    WebRtc_Word16 i = 0;
    G_CONST WebRtc_Word16 *tmpvector = vector;
    tempMax = WEBRTC_SPL_ABS_W16(*tmpvector);
    tmpvector++;
    for (i = 1; i < length; i++)
    {
        absTemp = WEBRTC_SPL_ABS_W16(*tmpvector);
        tmpvector++;
        if (absTemp > tempMax)
        {
            tempMax = absTemp;
            tempMaxIndex = i;
        }
    }
    return tempMaxIndex;
 }
 // Maximum absolute value of word32 vector.
-WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
+int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
-{
+  // Use uint for the local variables, to accommodate the value
-    WebRtc_UWord32 tempMax = 0;
+  // of abs(0x80000000).
    WebRtc_UWord32 absVal;
    WebRtc_Word32 retval;
    int i;
    G_CONST WebRtc_Word32 *tmpvector = vector;
-    for (i = 0; i < length; i++)
+  uint absolute = 0, maximum = 0;
-    {
+  int i = 0;
-        absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
+
-        if (absVal > tempMax)
+  if (vector == NULL || length <= 0) {
-        {
+    return -1;
-            tempMax = absVal;
+  }
-        }
+
-        tmpvector++;
+  for (i = 0; i < length; i++) {
    absolute = abs((int)vector[i]);
    if (absolute > maximum) {
      maximum = absolute;
    }
-    retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX));
+  }
-    return retval;
+
  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
  return (int32_t)maximum;
 }
 // Maximum value of word16 vector.
 #ifndef XSCALE_OPT
-WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
+int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
-{
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
-    WebRtc_Word16 tempMax;
+  int i = 0;
    WebRtc_Word16 i;
    G_CONST WebRtc_Word16 *tmpvector = vector;
-    tempMax = *tmpvector++;
+  if (vector == NULL || length <= 0) {
-    for (i = 1; i < length; i++)
+    return maximum;
-    {
+  }
        if (*tmpvector++ > tempMax)
            tempMax = vector[i];
    }
    return tempMax;
 }
 #else
 #pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
 #endif
-// Index of maximum value in a word16 vector.
+  for (i = 0; i < length; i++) {
-WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
+    if (vector[i] > maximum)
-{
+      maximum = vector[i];
-    WebRtc_Word16 tempMax;
+  }
-    WebRtc_Word16 tempMaxIndex = 0;
+  return maximum;
    WebRtc_Word16 i = 0;
    G_CONST WebRtc_Word16 *tmpvector = vector;
    tempMax = *tmpvector++;
    for (i = 1; i < length; i++)
    {
        if (*tmpvector++ > tempMax)
        {
            tempMax = vector[i];
            tempMaxIndex = i;
        }
    }
    return tempMaxIndex;
 }
 // Maximum value of word32 vector.
-#ifndef XSCALE_OPT
+int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
-WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
-{
+  int i = 0;
    WebRtc_Word32 tempMax;
    WebRtc_Word16 i;
    G_CONST WebRtc_Word32 *tmpvector = vector;
-    tempMax = *tmpvector++;
+  if (vector == NULL || length <= 0) {
-    for (i = 1; i < length; i++)
+    return maximum;
-    {
+  }
-        if (*tmpvector++ > tempMax)
+
-            tempMax = vector[i];
+  for (i = 0; i < length; i++) {
-    }
+    if (vector[i] > maximum)
-    return tempMax;
+      maximum = vector[i];
  }
  return maximum;
 }
 #else
 #pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
 #pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
 #endif
-// Index of maximum value in a word32 vector.
+// Minimum value of word16 vector.
-WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
+int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
-{
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
-    WebRtc_Word32 tempMax;
+  int i = 0;
    WebRtc_Word16 tempMaxIndex = 0;
    WebRtc_Word16 i = 0;
    G_CONST WebRtc_Word32 *tmpvector = vector;
-    tempMax = *tmpvector++;
+  if (vector == NULL || length <= 0) {
-    for (i = 1; i < length; i++)
+    return minimum;
-    {
+  }
-        if (*tmpvector++ > tempMax)
+
-        {
+  for (i = 0; i < length; i++) {
-            tempMax = vector[i];
+    if (vector[i] < minimum)
-            tempMaxIndex = i;
+      minimum = vector[i];
-        }
+  }
-    }
+  return minimum;
    return tempMaxIndex;
 }
-// Minimum value of word16 vector.
+// Minimum value of word32 vector.
-WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
+int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
-{
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
-    WebRtc_Word16 tempMin;
+  int i = 0;
    WebRtc_Word16 i;
    G_CONST WebRtc_Word16 *tmpvector = vector;
-    // Find the minimum value
+  if (vector == NULL || length <= 0) {
-    tempMin = *tmpvector++;
+    return minimum;
-    for (i = 1; i < length; i++)
+  }
-    {
+
-        if (*tmpvector++ < tempMin)
+  for (i = 0; i < length; i++) {
-            tempMin = (vector[i]);
+    if (vector[i] < minimum)
      minimum = vector[i];
  }
  return minimum;
 }
 #endif
 // Index of maximum absolute value in a word16 vector.
 int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
  // Use type int for local variables, to accomodate the value of abs(-32768).
  int i = 0, absolute = 0, maximum = 0, index = 0;
  if (vector == NULL || length <= 0) {
    return -1;
  }
  for (i = 0; i < length; i++) {
    absolute = abs((int)vector[i]);
    if (absolute > maximum) {
      maximum = absolute;
      index = i;
    }
-    return tempMin;
+  }
  return index;
 }
 // Index of maximum value in a word16 vector.
 int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) {
  int i = 0, index = 0;
  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
  if (vector == NULL || length <= 0) {
    return -1;
  }
  for (i = 0; i < length; i++) {
    if (vector[i] > maximum) {
      maximum = vector[i];
      index = i;
    }
  }
  return index;
 }
 // Index of maximum value in a word32 vector.
 int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) {
  int i = 0, index = 0;
  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
  if (vector == NULL || length <= 0) {
    return -1;
  }
  for (i = 0; i < length; i++) {
    if (vector[i] > maximum) {
      maximum = vector[i];
      index = i;
    }
  }
  return index;
 }
 // Index of minimum value in a word16 vector.
 #ifndef XSCALE_OPT
-WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
+int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) {
-{
+  int i = 0, index = 0;
-    WebRtc_Word16 tempMin;
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
    WebRtc_Word16 tempMinIndex = 0;
    WebRtc_Word16 i = 0;
    G_CONST WebRtc_Word16* tmpvector = vector;
-    // Find index of smallest value
+  if (vector == NULL || length <= 0) {
-    tempMin = *tmpvector++;
+    return -1;
-    for (i = 1; i < length; i++)
+  }
-    {
+
-        if (*tmpvector++ < tempMin)
+  for (i = 0; i < length; i++) {
-        {
+    if (vector[i] < minimum) {
-            tempMin = vector[i];
+      minimum = vector[i];
-            tempMinIndex = i;
+      index = i;
        }
    }
-    return tempMinIndex;
+  }
 }
 #else
 #pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
 #endif
-// Minimum value of word32 vector.
+  return index;
 WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
 {
    WebRtc_Word32 tempMin;
    WebRtc_Word16 i;
    G_CONST WebRtc_Word32 *tmpvector = vector;
    // Find the minimum value
    tempMin = *tmpvector++;
    for (i = 1; i < length; i++)
    {
        if (*tmpvector++ < tempMin)
            tempMin = (vector[i]);
    }
    return tempMin;
 }
 // Index of minimum value in a word32 vector.
-#ifndef XSCALE_OPT
+int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) {
-WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
+  int i = 0, index = 0;
-{
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
    WebRtc_Word32 tempMin;
    WebRtc_Word16 tempMinIndex = 0;
    WebRtc_Word16 i = 0;
    G_CONST WebRtc_Word32 *tmpvector = vector;
-    // Find index of smallest value
+  if (vector == NULL || length <= 0) {
-    tempMin = *tmpvector++;
+    return -1;
-    for (i = 1; i < length; i++)
+  }
-    {
+
-        if (*tmpvector++ < tempMin)
+  for (i = 0; i < length; i++) {
-        {
+    if (vector[i] < minimum) {
-            tempMin = vector[i];
+      minimum = vector[i];
-            tempMinIndex = i;
+      index = i;
        }
    }
-    return tempMinIndex;
+  }
  return index;
 }
 #else
 #pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
 #pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
 #endif
--- a/src/common_audio/signal_processing/min_max_operations_neon.s
+++ b/src/common_audio/signal_processing/min_max_operations_neon.s
@ -18,50 +18,288 @@
 .arch armv7-a
 .fpu neon
 .global WebRtcSpl_MaxAbsValueW16
 .global WebRtcSpl_MaxAbsValueW32
 .global WebRtcSpl_MaxValueW16
 .global WebRtcSpl_MaxValueW32
 .global WebRtcSpl_MinValueW16
 .global WebRtcSpl_MinValueW32
 .align  2
@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
 WebRtcSpl_MaxAbsValueW16:
 .fnstart
  mov r2, #-1                 @ Initialize the return value.
  cmp r0, #0
  beq END_MAX_ABS_VALUE_W16
  cmp r1, #0
  ble END_MAX_ABS_VALUE_W16
  cmp r1, #8
  blt LOOP_MAX_ABS_VALUE_W16
  vmov.i16 q12, #0
-  mov r2, #-1                 @ Return value for the maximum.
+  sub r1, #8                  @ Counter for loops
  cmp r1, #0                  @ length
  ble END                     @ Return -1 if length <= 0.
  cmp r1, #7
  ble LOOP_NO_UNROLLING
-  lsr r3, r1, #3
+LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
-  lsl r3, #3                  @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
+  vld1.16 {q13}, [r0]!
-  sub r1, r3                  @ Counter for LOOP_NO_UNROLLING: length % 8.
+  subs r1, #8
 LOOP_UNROLLED_BY_8:
  vld1.16 {d26, d27}, [r0]!
  subs r3, #8
  vabs.s16 q13, q13           @ Note vabs doesn't change the value of -32768.
  vmax.u16 q12, q13           @ Use u16 so we don't lose the value -32768.
-  bne LOOP_UNROLLED_BY_8
+  bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
  @ Find the maximum value in the Neon registers and move it to r2.
  vmax.u16 d24, d25
  vpmax.u16 d24, d24
  vpmax.u16 d24, d24
-  cmp r1, #0
+  adds r1, #8
  vmov.u16 r2, d24[0]
-  ble END
+  beq END_MAX_ABS_VALUE_W16
-LOOP_NO_UNROLLING:
+LOOP_MAX_ABS_VALUE_W16:
  ldrsh r3, [r0], #2
  eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
  sub r12, r12, r3, asr #31
  cmp r2, r12
  movlt r2, r12
  subs r1, #1
-  bne LOOP_NO_UNROLLING
+  bne LOOP_MAX_ABS_VALUE_W16
-END:
+END_MAX_ABS_VALUE_W16:
  cmp r2, #0x8000             @ Guard against the case for -32768.
  subeq r2, #1
  mov r0, r2
  bx  lr
 .fnend
@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
 WebRtcSpl_MaxAbsValueW32:
 .fnstart
  cmp r0, #0
  moveq r0, #-1
  beq EXIT                    @ Return -1 for a NULL pointer.
  cmp r1, #0                  @ length
  movle r0, #-1
  ble EXIT                    @ Return -1 if length <= 0.
  vmov.i32 q11, #0
  vmov.i32 q12, #0
  cmp r1, #8
  blt LOOP_MAX_ABS_VALUE_W32
  sub r1, #8                  @ Counter for loops
 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
  vld1.32 {q13, q14}, [r0]!
  subs r1, #8                 @ Counter for loops
  vabs.s32 q13, q13           @ vabs doesn't change the value of 0x80000000.
  vabs.s32 q14, q14
  vmax.u32 q11, q13           @ Use u32 so we don't lose the value 0x80000000.
  vmax.u32 q12, q14
  bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
  @ Find the maximum value in the Neon registers and move it to r2.
  vmax.u32 q12, q11
  vmax.u32 d24, d25
  vpmax.u32 d24, d24
  adds r1, #8
  vmov.u32 r2, d24[0]
  beq END_MAX_ABS_VALUE_W32
 LOOP_MAX_ABS_VALUE_W32:
  ldr r3, [r0], #4
  eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
  sub r12, r12, r3, asr #31
  cmp r2, r12
  movcc r2, r12
  subs r1, #1
  bne LOOP_MAX_ABS_VALUE_W32
 END_MAX_ABS_VALUE_W32:
  mvn r0, #0x80000000         @ Guard against the case for 0x80000000.
  cmp r2, r0
  movcc r0, r2
 EXIT:
  bx  lr
 .fnend
@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
 WebRtcSpl_MaxValueW16:
 .fnstart
  mov r2, #0x8000             @ Initialize the return value.
  cmp r0, #0
  beq END_MAX_VALUE_W16
  cmp r1, #0
  ble END_MAX_VALUE_W16
  vmov.i16 q12, #0x8000
  cmp r1, #8
  blt LOOP_MAX_VALUE_W16
  sub r1, #8                  @ Counter for loops
 LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
  vld1.16 {q13}, [r0]!
  subs r1, #8
  vmax.s16 q12, q13
  bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
  @ Find the maximum value in the Neon registers and move it to r2.
  vmax.s16 d24, d25
  vpmax.s16 d24, d24
  vpmax.s16 d24, d24
  adds r1, #8
  vmov.u16 r2, d24[0]
  beq END_MAX_VALUE_W16
 LOOP_MAX_VALUE_W16:
  ldrsh r3, [r0], #2
  cmp r2, r3
  movlt r2, r3
  subs r1, #1
  bne LOOP_MAX_VALUE_W16
 END_MAX_VALUE_W16:
  mov r0, r2
  bx  lr
 .fnend
@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
 WebRtcSpl_MaxValueW32:
 .fnstart
  mov r2, #0x80000000         @ Initialize the return value.
  cmp r0, #0
  beq END_MAX_VALUE_W32
  cmp r1, #0
  ble END_MAX_VALUE_W32
  vmov.i32 q11, #0x80000000
  vmov.i32 q12, #0x80000000
  cmp r1, #8
  blt LOOP_MAX_VALUE_W32
  sub r1, #8                  @ Counter for loops
 LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
  vld1.32 {q13, q14}, [r0]!
  subs r1, #8
  vmax.s32 q11, q13
  vmax.s32 q12, q14
  bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
  @ Find the maximum value in the Neon registers and move it to r2.
  vmax.s32 q12, q11
  vpmax.s32 d24, d25
  vpmax.s32 d24, d24
  adds r1, #8
  vmov.s32 r2, d24[0]
  beq END_MAX_VALUE_W32
 LOOP_MAX_VALUE_W32:
  ldr r3, [r0], #4
  cmp r2, r3
  movlt r2, r3
  subs r1, #1
  bne LOOP_MAX_VALUE_W32
 END_MAX_VALUE_W32:
  mov r0, r2
  bx  lr
 .fnend
@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
 WebRtcSpl_MinValueW16:
 .fnstart
  movw r2, #0x7FFF            @ Initialize the return value.
  cmp r0, #0
  beq END_MIN_VALUE_W16
  cmp r1, #0
  ble END_MIN_VALUE_W16
  vmov.i16 q12, #0x7FFF
  cmp r1, #8
  blt LOOP_MIN_VALUE_W16
  sub r1, #8                  @ Counter for loops
 LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
  vld1.16 {q13}, [r0]!
  subs r1, #8
  vmin.s16 q12, q13
  bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
  @ Find the maximum value in the Neon registers and move it to r2.
  vmin.s16 d24, d25
  vpmin.s16 d24, d24
  vpmin.s16 d24, d24
  adds r1, #8
  vmov.s16 r2, d24[0]
  sxth  r2, r2
  beq END_MIN_VALUE_W16
 LOOP_MIN_VALUE_W16:
  ldrsh r3, [r0], #2
  cmp r2, r3
  movge r2, r3
  subs r1, #1
  bne LOOP_MIN_VALUE_W16
 END_MIN_VALUE_W16:
  mov r0, r2
  bx  lr
 .fnend
@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
 WebRtcSpl_MinValueW32:
 .fnstart
  mov r2, #0x7FFFFFFF         @ Initialize the return value.
  cmp r0, #0
  beq END_MIN_VALUE_W32
  cmp r1, #0
  ble END_MIN_VALUE_W32
  vdup.32 q11, r2
  vdup.32 q12, r2
  cmp r1, #8
  blt LOOP_MIN_VALUE_W32
  sub r1, #8                  @ Counter for loops
 LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
  vld1.32 {q13, q14}, [r0]!
  subs r1, #8
  vmin.s32 q11, q13
  vmin.s32 q12, q14
  bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
  @ Find the maximum value in the Neon registers and move it to r2.
  vmin.s32 q12, q11
  vpmin.s32 d24, d25
  vpmin.s32 d24, d24
  adds r1, #8
  vmov.s32 r2, d24[0]
  beq END_MIN_VALUE_W32
 LOOP_MIN_VALUE_W32:
  ldr r3, [r0], #4
  cmp r2, r3
  movge r2, r3
  subs r1, #1
  bne LOOP_MIN_VALUE_W32
 END_MIN_VALUE_W32:
  mov r0, r2
  bx  lr
 .fnend