Optimizations on several SPL min max operations in ARM, and refactoring in C.

Touched C and assembly functions are tested with a new unit test which is not in the code base yet. Review URL: https://webrtc-codereview.appspot.com/428004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1974 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-04-02 03:55:20 +00:00 · 2012-04-02 03:55:20 +00:00 · 95c3d408f5
commit 95c3d408f5
parent f3bbc3e5b3
3 changed files with 544 additions and 300 deletions
--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@ -34,6 +34,8 @@
 #define WEBRTC_SPL_MAX_SEED_USED    0x80000000L
 #define WEBRTC_SPL_MIN(A, B)        (A < B ? A : B) // Get min value
 #define WEBRTC_SPL_MAX(A, B)        (A > B ? A : B) // Get max value
+// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
+// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
 #define WEBRTC_SPL_ABS_W16(a) \
    (((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
 #define WEBRTC_SPL_ABS_W32(a) \
@ -202,41 +204,130 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
                                     WebRtc_Word16 vector_length);
 // End: Copy and set operations.

+
 // Minimum and maximum operations. Implementation in min_max_operations.c.

 // Returns the largest absolute value in a signed 16-bit vector.
 //
 // Input:
-//      - vector :   Input vector.
+//      - vector : 16-bit input vector.
 //      - length : Number of samples in vector.
 //
-// Return value  :   Maximum absolute value in vector.
-
+// Return value  : Maximum absolute value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
 int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);

-WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
-                                       WebRtc_Word16 length);
-WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
-                                    WebRtc_Word16 length);
-WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector,
-                                    WebRtc_Word16 length);
-WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector,
-                                    WebRtc_Word16 length);
+// Returns the largest absolute value in a signed 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum absolute value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
+int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
+
+// Returns the maximum value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum sample value in |vector|.
+//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
+//                 is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
+//                 value and we can't catch errors purely based on it.
+int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
+
+// Returns the maximum value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum sample value in |vector|.
+//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
+//                 is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
+//                 value and we can't catch errors purely based on it.
+int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
+
+// Returns the minimum value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Minimum sample value in |vector|.
+//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
+//                 is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
+//                 value and we can't catch errors purely based on it.
+int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
+
+// Returns the minimum value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Minimum sample value in |vector|.
+//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
+//                 is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
+//                 value and we can't catch errors purely based on it.
+int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
+
+// Returns the vector index to the largest absolute value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the maximum absolute value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
+int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length);
+
+// Returns the vector index to the maximum sample value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the maximum value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
+int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length);
+
+// Returns the vector index to the maximum sample value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the maximum value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
+int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length);
+
+// Returns the vector index to the minimum sample value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the mimimum value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
+int WebRtcSpl_MinIndexW16(const int16_t* vector, int length);
+
+// Returns the vector index to the minimum sample value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the mimimum value in vector;
+//                 or -1, if (vector == NULL || length <= 0).
+int WebRtcSpl_MinIndexW32(const int32_t* vector, int length);

-WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector,
-                                       WebRtc_Word16 length);
-WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector,
-                                    WebRtc_Word16 length);
-WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector,
-                                    WebRtc_Word16 length);
-WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector,
-                                    WebRtc_Word16 length);
-WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector,
-                                    WebRtc_Word16 length);
-WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector,
-                                    WebRtc_Word16 length);
 // End: Minimum and maximum operations.

+
 // Vector scaling operations. Implementation in vector_scaling_operations.c.
 // Description at bottom of file.
 void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
@ -849,81 +940,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 // Return value         : Number of samples in vector
 //

-//
-// WebRtcSpl_MinValueW16(...)
-// WebRtcSpl_MinValueW32(...)
-//
-// Returns the minimum value of a vector
-//
-// Input:
-//      - vector        : Input vector
-//      - vector_length : Number of samples in vector
-//
-// Return value         : Minimum sample value in vector
-//
-
-//
-// WebRtcSpl_MaxValueW16(...)
-// WebRtcSpl_MaxValueW32(...)
-//
-// Returns the maximum value of a vector
-//
-// Input:
-//      - vector        : Input vector
-//      - vector_length : Number of samples in vector
-//
-// Return value         : Maximum sample value in vector
-//
-
-// WebRtcSpl_MaxAbsValueW32(...)
-//
-// Returns the largest absolute value of a vector
-//
-// Input:
-//      - vector        : Input vector
-//      - vector_length : Number of samples in vector
-//
-// Return value         : Maximum absolute value in vector
-//
-
-//
-// WebRtcSpl_MaxAbsIndexW16(...)
-//
-// Returns the vector index to the largest absolute value of a vector
-//
-// Input:
-//      - vector        : Input vector
-//      - vector_length : Number of samples in vector
-//
-// Return value         : Index to maximum absolute value in vector
-//
-
-//
-// WebRtcSpl_MinIndexW16(...)
-// WebRtcSpl_MinIndexW32(...)
-//
-// Returns the vector index to the minimum sample value of a vector
-//
-// Input:
-//      - vector        : Input vector
-//      - vector_length : Number of samples in vector
-//
-// Return value         : Index to minimum sample value in vector
-//
-
-//
-// WebRtcSpl_MaxIndexW16(...)
-// WebRtcSpl_MaxIndexW32(...)
-//
-// Returns the vector index to the maximum sample value of a vector
-//
-// Input:
-//      - vector        : Input vector
-//      - vector_length : Number of samples in vector
-//
-// Return value         : Index to maximum sample value in vector
-//
-
 //
 // WebRtcSpl_VectorBitShiftW16(...)
 // WebRtcSpl_VectorBitShiftW32(...)
--- a/src/common_audio/signal_processing/min_max_operations.c
+++ b/src/common_audio/signal_processing/min_max_operations.c
@ -11,32 +11,35 @@
 /*
 * This file contains the implementation of functions
 * WebRtcSpl_MaxAbsValueW16()
- * WebRtcSpl_MaxAbsIndexW16()
 * WebRtcSpl_MaxAbsValueW32()
 * WebRtcSpl_MaxValueW16()
- * WebRtcSpl_MaxIndexW16()
 * WebRtcSpl_MaxValueW32()
- * WebRtcSpl_MaxIndexW32()
 * WebRtcSpl_MinValueW16()
- * WebRtcSpl_MinIndexW16()
 * WebRtcSpl_MinValueW32()
+ * WebRtcSpl_MaxAbsIndexW16()
+ * WebRtcSpl_MaxIndexW16()
+ * WebRtcSpl_MaxIndexW32()
+ * WebRtcSpl_MinIndexW16()
 * WebRtcSpl_MinIndexW32()
 *
- * The description header can be found in signal_processing_library.h.
- *
 */

 #include "signal_processing_library.h"

 #include <stdlib.h>

+// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
+// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+
 #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))

 // Maximum absolute value of word16 vector.
 int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
-  int i = 0;
-  int absolute = 0;
-  int maximum = -1;  // Return -1 if length <= 0.
+  int i = 0, absolute = 0, maximum = 0;
+
+  if (vector == NULL || length <= 0) {
+    return -1;
+  }

  for (i = 0; i < length; i++) {
    absolute = abs((int)vector[i]);
@ -54,214 +57,201 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
  return (int16_t)maximum;
 }

-#endif
-
-// Index of maximum absolute value in a  word16 vector.
-WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
-{
-    WebRtc_Word16 tempMax;
-    WebRtc_Word16 absTemp;
-    WebRtc_Word16 tempMaxIndex = 0;
-    WebRtc_Word16 i = 0;
-    G_CONST WebRtc_Word16 *tmpvector = vector;
-
-    tempMax = WEBRTC_SPL_ABS_W16(*tmpvector);
-    tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        absTemp = WEBRTC_SPL_ABS_W16(*tmpvector);
-        tmpvector++;
-        if (absTemp > tempMax)
-        {
-            tempMax = absTemp;
-            tempMaxIndex = i;
-        }
-    }
-    return tempMaxIndex;
-}
-
 // Maximum absolute value of word32 vector.
-WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
-{
-    WebRtc_UWord32 tempMax = 0;
-    WebRtc_UWord32 absVal;
-    WebRtc_Word32 retval;
-    int i;
-    G_CONST WebRtc_Word32 *tmpvector = vector;
+int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
+  // Use uint for the local variables, to accommodate the value
+  // of abs(0x80000000).

-    for (i = 0; i < length; i++)
-    {
-        absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
-        if (absVal > tempMax)
-        {
-            tempMax = absVal;
+  uint absolute = 0, maximum = 0;
+  int i = 0;
+
+  if (vector == NULL || length <= 0) {
+    return -1;
  }
-        tmpvector++;
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+    if (absolute > maximum) {
+      maximum = absolute;
    }
-    retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX));
-    return retval;
+  }
+
+  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+  return (int32_t)maximum;
 }

 // Maximum value of word16 vector.
 #ifndef XSCALE_OPT
-WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
-{
-    WebRtc_Word16 tempMax;
-    WebRtc_Word16 i;
-    G_CONST WebRtc_Word16 *tmpvector = vector;
+int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  int i = 0;

-    tempMax = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ > tempMax)
-            tempMax = vector[i];
+  if (vector == NULL || length <= 0) {
+    return maximum;
  }
-    return tempMax;
-}
-#else
-#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
-#endif

-// Index of maximum value in a word16 vector.
-WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
-{
-    WebRtc_Word16 tempMax;
-    WebRtc_Word16 tempMaxIndex = 0;
-    WebRtc_Word16 i = 0;
-    G_CONST WebRtc_Word16 *tmpvector = vector;
-
-    tempMax = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ > tempMax)
-        {
-            tempMax = vector[i];
-            tempMaxIndex = i;
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
  }
-    }
-    return tempMaxIndex;
+  return maximum;
 }

 // Maximum value of word32 vector.
-#ifndef XSCALE_OPT
-WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
-{
-    WebRtc_Word32 tempMax;
-    WebRtc_Word16 i;
-    G_CONST WebRtc_Word32 *tmpvector = vector;
+int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  int i = 0;

-    tempMax = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ > tempMax)
-            tempMax = vector[i];
+  if (vector == NULL || length <= 0) {
+    return maximum;
  }
-    return tempMax;
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
 }
 #else
+#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
 #pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
 #endif

-// Index of maximum value in a word32 vector.
-WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
-{
-    WebRtc_Word32 tempMax;
-    WebRtc_Word16 tempMaxIndex = 0;
-    WebRtc_Word16 i = 0;
-    G_CONST WebRtc_Word32 *tmpvector = vector;
+// Minimum value of word16 vector.
+int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  int i = 0;

-    tempMax = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ > tempMax)
-        {
-            tempMax = vector[i];
-            tempMaxIndex = i;
+  if (vector == NULL || length <= 0) {
+    return minimum;
  }
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
  }
-    return tempMaxIndex;
+  return minimum;
 }

-// Minimum value of word16 vector.
-WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
-{
-    WebRtc_Word16 tempMin;
-    WebRtc_Word16 i;
-    G_CONST WebRtc_Word16 *tmpvector = vector;
+// Minimum value of word32 vector.
+int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  int i = 0;

-    // Find the minimum value
-    tempMin = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ < tempMin)
-            tempMin = (vector[i]);
+  if (vector == NULL || length <= 0) {
+    return minimum;
  }
-    return tempMin;
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+#endif
+
+
+// Index of maximum absolute value in a word16 vector.
+int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
+  // Use type int for local variables, to accomodate the value of abs(-32768).
+
+  int i = 0, absolute = 0, maximum = 0, index = 0;
+
+  if (vector == NULL || length <= 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word16 vector.
+int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) {
+  int i = 0, index = 0;
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+
+  if (vector == NULL || length <= 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word32 vector.
+int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) {
+  int i = 0, index = 0;
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+
+  if (vector == NULL || length <= 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
 }

 // Index of minimum value in a word16 vector.
 #ifndef XSCALE_OPT
-WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
-{
-    WebRtc_Word16 tempMin;
-    WebRtc_Word16 tempMinIndex = 0;
-    WebRtc_Word16 i = 0;
-    G_CONST WebRtc_Word16* tmpvector = vector;
+int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) {
+  int i = 0, index = 0;
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;

-    // Find index of smallest value
-    tempMin = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ < tempMin)
-        {
-            tempMin = vector[i];
-            tempMinIndex = i;
+  if (vector == NULL || length <= 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
    }
  }
-    return tempMinIndex;
-}
-#else
-#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
-#endif

-// Minimum value of word32 vector.
-WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
-{
-    WebRtc_Word32 tempMin;
-    WebRtc_Word16 i;
-    G_CONST WebRtc_Word32 *tmpvector = vector;
-
-    // Find the minimum value
-    tempMin = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ < tempMin)
-            tempMin = (vector[i]);
-    }
-    return tempMin;
+  return index;
 }

 // Index of minimum value in a word32 vector.
-#ifndef XSCALE_OPT
-WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
-{
-    WebRtc_Word32 tempMin;
-    WebRtc_Word16 tempMinIndex = 0;
-    WebRtc_Word16 i = 0;
-    G_CONST WebRtc_Word32 *tmpvector = vector;
+int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) {
+  int i = 0, index = 0;
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;

-    // Find index of smallest value
-    tempMin = *tmpvector++;
-    for (i = 1; i < length; i++)
-    {
-        if (*tmpvector++ < tempMin)
-        {
-            tempMin = vector[i];
-            tempMinIndex = i;
+  if (vector == NULL || length <= 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
    }
  }
-    return tempMinIndex;
+
+  return index;
 }
+
 #else
+#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
 #pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
 #endif
--- a/src/common_audio/signal_processing/min_max_operations_neon.s
+++ b/src/common_audio/signal_processing/min_max_operations_neon.s
@ -18,50 +18,288 @@
 .arch armv7-a
 .fpu neon
 .global WebRtcSpl_MaxAbsValueW16
+.global WebRtcSpl_MaxAbsValueW32
+.global WebRtcSpl_MaxValueW16
+.global WebRtcSpl_MaxValueW32
+.global WebRtcSpl_MinValueW16
+.global WebRtcSpl_MinValueW32
 .align  2

+@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
 WebRtcSpl_MaxAbsValueW16:
 .fnstart

+  mov r2, #-1                 @ Initialize the return value.
+  cmp r0, #0
+  beq END_MAX_ABS_VALUE_W16
+  cmp r1, #0
+  ble END_MAX_ABS_VALUE_W16
+
+  cmp r1, #8
+  blt LOOP_MAX_ABS_VALUE_W16
+
  vmov.i16 q12, #0
-  mov r2, #-1                 @ Return value for the maximum.
-  cmp r1, #0                  @ length
-  ble END                     @ Return -1 if length <= 0.
-  cmp r1, #7
-  ble LOOP_NO_UNROLLING
+  sub r1, #8                  @ Counter for loops

-  lsr r3, r1, #3
-  lsl r3, #3                  @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
-  sub r1, r3                  @ Counter for LOOP_NO_UNROLLING: length % 8.
-
-LOOP_UNROLLED_BY_8:
-  vld1.16 {d26, d27}, [r0]!
-  subs r3, #8
+LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
+  vld1.16 {q13}, [r0]!
+  subs r1, #8
  vabs.s16 q13, q13           @ Note vabs doesn't change the value of -32768.
  vmax.u16 q12, q13           @ Use u16 so we don't lose the value -32768.
-  bne LOOP_UNROLLED_BY_8
+  bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16

  @ Find the maximum value in the Neon registers and move it to r2.
  vmax.u16 d24, d25
  vpmax.u16 d24, d24
  vpmax.u16 d24, d24
-  cmp r1, #0
+  adds r1, #8
  vmov.u16 r2, d24[0]
-  ble END
+  beq END_MAX_ABS_VALUE_W16

-LOOP_NO_UNROLLING:
+LOOP_MAX_ABS_VALUE_W16:
  ldrsh r3, [r0], #2
  eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
  sub r12, r12, r3, asr #31
  cmp r2, r12
  movlt r2, r12
  subs r1, #1
-  bne LOOP_NO_UNROLLING
+  bne LOOP_MAX_ABS_VALUE_W16

-END:
+END_MAX_ABS_VALUE_W16:
  cmp r2, #0x8000             @ Guard against the case for -32768.
  subeq r2, #1
  mov r0, r2
  bx  lr

 .fnend
+
+@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
+WebRtcSpl_MaxAbsValueW32:
+.fnstart
+
+  cmp r0, #0
+  moveq r0, #-1
+  beq EXIT                    @ Return -1 for a NULL pointer.
+  cmp r1, #0                  @ length
+  movle r0, #-1
+  ble EXIT                    @ Return -1 if length <= 0.
+
+  vmov.i32 q11, #0
+  vmov.i32 q12, #0
+  cmp r1, #8
+  blt LOOP_MAX_ABS_VALUE_W32
+
+  sub r1, #8                  @ Counter for loops
+
+LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
+  vld1.32 {q13, q14}, [r0]!
+  subs r1, #8                 @ Counter for loops
+  vabs.s32 q13, q13           @ vabs doesn't change the value of 0x80000000.
+  vabs.s32 q14, q14
+  vmax.u32 q11, q13           @ Use u32 so we don't lose the value 0x80000000.
+  vmax.u32 q12, q14
+  bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
+
+  @ Find the maximum value in the Neon registers and move it to r2.
+  vmax.u32 q12, q11
+  vmax.u32 d24, d25
+  vpmax.u32 d24, d24
+  adds r1, #8
+  vmov.u32 r2, d24[0]
+  beq END_MAX_ABS_VALUE_W32
+
+LOOP_MAX_ABS_VALUE_W32:
+  ldr r3, [r0], #4
+  eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
+  sub r12, r12, r3, asr #31
+  cmp r2, r12
+  movcc r2, r12
+  subs r1, #1
+  bne LOOP_MAX_ABS_VALUE_W32
+
+END_MAX_ABS_VALUE_W32:
+  mvn r0, #0x80000000         @ Guard against the case for 0x80000000.
+  cmp r2, r0
+  movcc r0, r2
+
+EXIT:
+  bx  lr
+
+.fnend
+
+@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
+WebRtcSpl_MaxValueW16:
+.fnstart
+
+  mov r2, #0x8000             @ Initialize the return value.
+  cmp r0, #0
+  beq END_MAX_VALUE_W16
+  cmp r1, #0
+  ble END_MAX_VALUE_W16
+
+  vmov.i16 q12, #0x8000
+  cmp r1, #8
+  blt LOOP_MAX_VALUE_W16
+
+  sub r1, #8                  @ Counter for loops
+
+LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
+  vld1.16 {q13}, [r0]!
+  subs r1, #8
+  vmax.s16 q12, q13
+  bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
+
+  @ Find the maximum value in the Neon registers and move it to r2.
+  vmax.s16 d24, d25
+  vpmax.s16 d24, d24
+  vpmax.s16 d24, d24
+  adds r1, #8
+  vmov.u16 r2, d24[0]
+  beq END_MAX_VALUE_W16
+
+LOOP_MAX_VALUE_W16:
+  ldrsh r3, [r0], #2
+  cmp r2, r3
+  movlt r2, r3
+  subs r1, #1
+  bne LOOP_MAX_VALUE_W16
+
+END_MAX_VALUE_W16:
+  mov r0, r2
+  bx  lr
+
+.fnend
+
+@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
+WebRtcSpl_MaxValueW32:
+.fnstart
+
+  mov r2, #0x80000000         @ Initialize the return value.
+  cmp r0, #0
+  beq END_MAX_VALUE_W32
+  cmp r1, #0
+  ble END_MAX_VALUE_W32
+
+  vmov.i32 q11, #0x80000000
+  vmov.i32 q12, #0x80000000
+  cmp r1, #8
+  blt LOOP_MAX_VALUE_W32
+
+  sub r1, #8                  @ Counter for loops
+
+LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
+  vld1.32 {q13, q14}, [r0]!
+  subs r1, #8
+  vmax.s32 q11, q13
+  vmax.s32 q12, q14
+  bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
+
+  @ Find the maximum value in the Neon registers and move it to r2.
+  vmax.s32 q12, q11
+  vpmax.s32 d24, d25
+  vpmax.s32 d24, d24
+  adds r1, #8
+  vmov.s32 r2, d24[0]
+  beq END_MAX_VALUE_W32
+
+LOOP_MAX_VALUE_W32:
+  ldr r3, [r0], #4
+  cmp r2, r3
+  movlt r2, r3
+  subs r1, #1
+  bne LOOP_MAX_VALUE_W32
+
+END_MAX_VALUE_W32:
+  mov r0, r2
+  bx  lr
+
+.fnend
+
+@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
+WebRtcSpl_MinValueW16:
+.fnstart
+
+  movw r2, #0x7FFF            @ Initialize the return value.
+  cmp r0, #0
+  beq END_MIN_VALUE_W16
+  cmp r1, #0
+  ble END_MIN_VALUE_W16
+
+  vmov.i16 q12, #0x7FFF
+  cmp r1, #8
+  blt LOOP_MIN_VALUE_W16
+
+  sub r1, #8                  @ Counter for loops
+
+LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
+  vld1.16 {q13}, [r0]!
+  subs r1, #8
+  vmin.s16 q12, q13
+  bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
+
+  @ Find the maximum value in the Neon registers and move it to r2.
+  vmin.s16 d24, d25
+  vpmin.s16 d24, d24
+  vpmin.s16 d24, d24
+  adds r1, #8
+  vmov.s16 r2, d24[0]
+  sxth  r2, r2
+  beq END_MIN_VALUE_W16
+
+LOOP_MIN_VALUE_W16:
+  ldrsh r3, [r0], #2
+  cmp r2, r3
+  movge r2, r3
+  subs r1, #1
+  bne LOOP_MIN_VALUE_W16
+
+END_MIN_VALUE_W16:
+  mov r0, r2
+  bx  lr
+
+.fnend
+
+@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
+WebRtcSpl_MinValueW32:
+.fnstart
+
+  mov r2, #0x7FFFFFFF         @ Initialize the return value.
+  cmp r0, #0
+  beq END_MIN_VALUE_W32
+  cmp r1, #0
+  ble END_MIN_VALUE_W32
+
+  vdup.32 q11, r2
+  vdup.32 q12, r2
+  cmp r1, #8
+  blt LOOP_MIN_VALUE_W32
+
+  sub r1, #8                  @ Counter for loops
+
+LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
+  vld1.32 {q13, q14}, [r0]!
+  subs r1, #8
+  vmin.s32 q11, q13
+  vmin.s32 q12, q14
+  bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
+
+  @ Find the maximum value in the Neon registers and move it to r2.
+  vmin.s32 q12, q11
+  vpmin.s32 d24, d25
+  vpmin.s32 d24, d24
+  adds r1, #8
+  vmov.s32 r2, d24[0]
+  beq END_MIN_VALUE_W32
+
+LOOP_MIN_VALUE_W32:
+  ldr r3, [r0], #4
+  cmp r2, r3
+  movge r2, r3
+  subs r1, #1
+  bne LOOP_MIN_VALUE_W32
+
+END_MIN_VALUE_W32:
+  mov r0, r2
+  bx  lr
+
+.fnend