Optimizations on several SPL min max operations in ARM, and refactoring in C.

Touched C and assembly functions are tested with a new unit test which is not in the code base yet.
Review URL: https://webrtc-codereview.appspot.com/428004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1974 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2012-04-02 03:55:20 +00:00
parent f3bbc3e5b3
commit 95c3d408f5
3 changed files with 544 additions and 300 deletions

View File

@ -34,6 +34,8 @@
#define WEBRTC_SPL_MAX_SEED_USED 0x80000000L #define WEBRTC_SPL_MAX_SEED_USED 0x80000000L
#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value #define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value #define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
#define WEBRTC_SPL_ABS_W16(a) \ #define WEBRTC_SPL_ABS_W16(a) \
(((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a)) (((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
#define WEBRTC_SPL_ABS_W32(a) \ #define WEBRTC_SPL_ABS_W32(a) \
@ -202,41 +204,130 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
WebRtc_Word16 vector_length); WebRtc_Word16 vector_length);
// End: Copy and set operations. // End: Copy and set operations.
// Minimum and maximum operations. Implementation in min_max_operations.c. // Minimum and maximum operations. Implementation in min_max_operations.c.
// Returns the largest absolute value in a signed 16-bit vector. // Returns the largest absolute value in a signed 16-bit vector.
// //
// Input: // Input:
// - vector : Input vector. // - vector : 16-bit input vector.
// - length : Number of samples in vector. // - length : Number of samples in vector.
// //
// Return value : Maximum absolute value in vector. // Return value : Maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length); int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector, // Returns the largest absolute value in a signed 32-bit vector.
WebRtc_Word16 length); //
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector, // Input:
WebRtc_Word16 length); // - vector : 32-bit input vector.
WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector, // - length : Number of samples in vector.
WebRtc_Word16 length); //
WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, // Return value : Maximum absolute value in vector;
WebRtc_Word16 length); // or -1, if (vector == NULL || length <= 0).
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
// Returns the maximum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
// is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
// value and we can't catch errors purely based on it.
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
// Returns the maximum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
// is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
// value and we can't catch errors purely based on it.
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
// Returns the minimum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
// is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
// value and we can't catch errors purely based on it.
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
// Returns the minimum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
// is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
// value and we can't catch errors purely based on it.
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
// Returns the vector index to the largest absolute value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length);
// Returns the vector index to the maximum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length);
// Returns the vector index to the maximum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length);
// Returns the vector index to the minimum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MinIndexW16(const int16_t* vector, int length);
// Returns the vector index to the minimum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MinIndexW32(const int32_t* vector, int length);
WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
// End: Minimum and maximum operations. // End: Minimum and maximum operations.
// Vector scaling operations. Implementation in vector_scaling_operations.c. // Vector scaling operations. Implementation in vector_scaling_operations.c.
// Description at bottom of file. // Description at bottom of file.
void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector, void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
@ -849,81 +940,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// Return value : Number of samples in vector // Return value : Number of samples in vector
// //
//
// WebRtcSpl_MinValueW16(...)
// WebRtcSpl_MinValueW32(...)
//
// Returns the minimum value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Minimum sample value in vector
//
//
// WebRtcSpl_MaxValueW16(...)
// WebRtcSpl_MaxValueW32(...)
//
// Returns the maximum value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Maximum sample value in vector
//
// WebRtcSpl_MaxAbsValueW32(...)
//
// Returns the largest absolute value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Maximum absolute value in vector
//
//
// WebRtcSpl_MaxAbsIndexW16(...)
//
// Returns the vector index to the largest absolute value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Index to maximum absolute value in vector
//
//
// WebRtcSpl_MinIndexW16(...)
// WebRtcSpl_MinIndexW32(...)
//
// Returns the vector index to the minimum sample value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Index to minimum sample value in vector
//
//
// WebRtcSpl_MaxIndexW16(...)
// WebRtcSpl_MaxIndexW32(...)
//
// Returns the vector index to the maximum sample value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Index to maximum sample value in vector
//
// //
// WebRtcSpl_VectorBitShiftW16(...) // WebRtcSpl_VectorBitShiftW16(...)
// WebRtcSpl_VectorBitShiftW32(...) // WebRtcSpl_VectorBitShiftW32(...)
@ -1627,7 +1643,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// WebRtc_Word16 WebRtcSpl_SatW32ToW16(...) // WebRtc_Word16 WebRtcSpl_SatW32ToW16(...)
// //
// This function saturates a 32-bit word into a 16-bit word. // This function saturates a 32-bit word into a 16-bit word.
// //
// Input: // Input:
// - value32 : The value of a 32-bit word. // - value32 : The value of a 32-bit word.
// //
@ -1639,7 +1655,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// //
// This function multiply a 16-bit word by a 16-bit word, and accumulate this // This function multiply a 16-bit word by a 16-bit word, and accumulate this
// value to a 32-bit integer. // value to a 32-bit integer.
// //
// Input: // Input:
// - a : The value of the first 16-bit word. // - a : The value of the first 16-bit word.
// - b : The value of the second 16-bit word. // - b : The value of the second 16-bit word.

View File

@ -11,32 +11,35 @@
/* /*
* This file contains the implementation of functions * This file contains the implementation of functions
* WebRtcSpl_MaxAbsValueW16() * WebRtcSpl_MaxAbsValueW16()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxAbsValueW32() * WebRtcSpl_MaxAbsValueW32()
* WebRtcSpl_MaxValueW16() * WebRtcSpl_MaxValueW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxValueW32() * WebRtcSpl_MaxValueW32()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinValueW16() * WebRtcSpl_MinValueW16()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinValueW32() * WebRtcSpl_MinValueW32()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinIndexW32() * WebRtcSpl_MinIndexW32()
* *
* The description header can be found in signal_processing_library.h.
*
*/ */
#include "signal_processing_library.h" #include "signal_processing_library.h"
#include <stdlib.h> #include <stdlib.h>
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
// Maximum absolute value of word16 vector. // Maximum absolute value of word16 vector.
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) { int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
int i = 0; int i = 0, absolute = 0, maximum = 0;
int absolute = 0;
int maximum = -1; // Return -1 if length <= 0. if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) { for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]); absolute = abs((int)vector[i]);
@ -54,214 +57,201 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
return (int16_t)maximum; return (int16_t)maximum;
} }
#endif
// Index of maximum absolute value in a word16 vector.
WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
{
WebRtc_Word16 tempMax;
WebRtc_Word16 absTemp;
WebRtc_Word16 tempMaxIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word16 *tmpvector = vector;
tempMax = WEBRTC_SPL_ABS_W16(*tmpvector);
tmpvector++;
for (i = 1; i < length; i++)
{
absTemp = WEBRTC_SPL_ABS_W16(*tmpvector);
tmpvector++;
if (absTemp > tempMax)
{
tempMax = absTemp;
tempMaxIndex = i;
}
}
return tempMaxIndex;
}
// Maximum absolute value of word32 vector. // Maximum absolute value of word32 vector.
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length) int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
{ // Use uint for the local variables, to accommodate the value
WebRtc_UWord32 tempMax = 0; // of abs(0x80000000).
WebRtc_UWord32 absVal;
WebRtc_Word32 retval;
int i;
G_CONST WebRtc_Word32 *tmpvector = vector;
for (i = 0; i < length; i++) uint absolute = 0, maximum = 0;
{ int i = 0;
absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
if (absVal > tempMax) if (vector == NULL || length <= 0) {
{ return -1;
tempMax = absVal; }
}
tmpvector++; for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
} }
retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX)); }
return retval;
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
return (int32_t)maximum;
} }
// Maximum value of word16 vector. // Maximum value of word16 vector.
#ifndef XSCALE_OPT #ifndef XSCALE_OPT
WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
{ int16_t maximum = WEBRTC_SPL_WORD16_MIN;
WebRtc_Word16 tempMax; int i = 0;
WebRtc_Word16 i;
G_CONST WebRtc_Word16 *tmpvector = vector;
tempMax = *tmpvector++; if (vector == NULL || length <= 0) {
for (i = 1; i < length; i++) return maximum;
{ }
if (*tmpvector++ > tempMax)
tempMax = vector[i];
}
return tempMax;
}
#else
#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
#endif
// Index of maximum value in a word16 vector. for (i = 0; i < length; i++) {
WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length) if (vector[i] > maximum)
{ maximum = vector[i];
WebRtc_Word16 tempMax; }
WebRtc_Word16 tempMaxIndex = 0; return maximum;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word16 *tmpvector = vector;
tempMax = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ > tempMax)
{
tempMax = vector[i];
tempMaxIndex = i;
}
}
return tempMaxIndex;
} }
// Maximum value of word32 vector. // Maximum value of word32 vector.
#ifndef XSCALE_OPT int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) int32_t maximum = WEBRTC_SPL_WORD32_MIN;
{ int i = 0;
WebRtc_Word32 tempMax;
WebRtc_Word16 i;
G_CONST WebRtc_Word32 *tmpvector = vector;
tempMax = *tmpvector++; if (vector == NULL || length <= 0) {
for (i = 1; i < length; i++) return maximum;
{ }
if (*tmpvector++ > tempMax)
tempMax = vector[i]; for (i = 0; i < length; i++) {
} if (vector[i] > maximum)
return tempMax; maximum = vector[i];
}
return maximum;
} }
#else #else
#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
#pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build") #pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
#endif #endif
// Index of maximum value in a word32 vector. // Minimum value of word16 vector.
WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
{ int16_t minimum = WEBRTC_SPL_WORD16_MAX;
WebRtc_Word32 tempMax; int i = 0;
WebRtc_Word16 tempMaxIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word32 *tmpvector = vector;
tempMax = *tmpvector++; if (vector == NULL || length <= 0) {
for (i = 1; i < length; i++) return minimum;
{ }
if (*tmpvector++ > tempMax)
{ for (i = 0; i < length; i++) {
tempMax = vector[i]; if (vector[i] < minimum)
tempMaxIndex = i; minimum = vector[i];
} }
} return minimum;
return tempMaxIndex;
} }
// Minimum value of word16 vector. // Minimum value of word32 vector.
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length) int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
{ int32_t minimum = WEBRTC_SPL_WORD32_MAX;
WebRtc_Word16 tempMin; int i = 0;
WebRtc_Word16 i;
G_CONST WebRtc_Word16 *tmpvector = vector;
// Find the minimum value if (vector == NULL || length <= 0) {
tempMin = *tmpvector++; return minimum;
for (i = 1; i < length; i++) }
{
if (*tmpvector++ < tempMin) for (i = 0; i < length; i++) {
tempMin = (vector[i]); if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
#endif
// Index of maximum absolute value in a word16 vector.
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
// Use type int for local variables, to accomodate the value of abs(-32768).
int i = 0, absolute = 0, maximum = 0, index = 0;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
index = i;
} }
return tempMin; }
return index;
}
// Index of maximum value in a word16 vector.
int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) {
int i = 0, index = 0;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of maximum value in a word32 vector.
int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) {
int i = 0, index = 0;
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
} }
// Index of minimum value in a word16 vector. // Index of minimum value in a word16 vector.
#ifndef XSCALE_OPT #ifndef XSCALE_OPT
WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) {
{ int i = 0, index = 0;
WebRtc_Word16 tempMin; int16_t minimum = WEBRTC_SPL_WORD16_MAX;
WebRtc_Word16 tempMinIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word16* tmpvector = vector;
// Find index of smallest value if (vector == NULL || length <= 0) {
tempMin = *tmpvector++; return -1;
for (i = 1; i < length; i++) }
{
if (*tmpvector++ < tempMin) for (i = 0; i < length; i++) {
{ if (vector[i] < minimum) {
tempMin = vector[i]; minimum = vector[i];
tempMinIndex = i; index = i;
}
} }
return tempMinIndex; }
}
#else
#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
#endif
// Minimum value of word32 vector. return index;
WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
{
WebRtc_Word32 tempMin;
WebRtc_Word16 i;
G_CONST WebRtc_Word32 *tmpvector = vector;
// Find the minimum value
tempMin = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ < tempMin)
tempMin = (vector[i]);
}
return tempMin;
} }
// Index of minimum value in a word32 vector. // Index of minimum value in a word32 vector.
#ifndef XSCALE_OPT int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) {
WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) int i = 0, index = 0;
{ int32_t minimum = WEBRTC_SPL_WORD32_MAX;
WebRtc_Word32 tempMin;
WebRtc_Word16 tempMinIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word32 *tmpvector = vector;
// Find index of smallest value if (vector == NULL || length <= 0) {
tempMin = *tmpvector++; return -1;
for (i = 1; i < length; i++) }
{
if (*tmpvector++ < tempMin) for (i = 0; i < length; i++) {
{ if (vector[i] < minimum) {
tempMin = vector[i]; minimum = vector[i];
tempMinIndex = i; index = i;
}
} }
return tempMinIndex; }
return index;
} }
#else #else
#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
#pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build") #pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
#endif #endif

View File

@ -18,50 +18,288 @@
.arch armv7-a .arch armv7-a
.fpu neon .fpu neon
.global WebRtcSpl_MaxAbsValueW16 .global WebRtcSpl_MaxAbsValueW16
.global WebRtcSpl_MaxAbsValueW32
.global WebRtcSpl_MaxValueW16
.global WebRtcSpl_MaxValueW32
.global WebRtcSpl_MinValueW16
.global WebRtcSpl_MinValueW32
.align 2 .align 2
@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
WebRtcSpl_MaxAbsValueW16: WebRtcSpl_MaxAbsValueW16:
.fnstart .fnstart
mov r2, #-1 @ Initialize the return value.
cmp r0, #0
beq END_MAX_ABS_VALUE_W16
cmp r1, #0
ble END_MAX_ABS_VALUE_W16
cmp r1, #8
blt LOOP_MAX_ABS_VALUE_W16
vmov.i16 q12, #0 vmov.i16 q12, #0
mov r2, #-1 @ Return value for the maximum. sub r1, #8 @ Counter for loops
cmp r1, #0 @ length
ble END @ Return -1 if length <= 0.
cmp r1, #7
ble LOOP_NO_UNROLLING
lsr r3, r1, #3 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. vld1.16 {q13}, [r0]!
sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8. subs r1, #8
LOOP_UNROLLED_BY_8:
vld1.16 {d26, d27}, [r0]!
subs r3, #8
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768. vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768. vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
bne LOOP_UNROLLED_BY_8 bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
@ Find the maximum value in the Neon registers and move it to r2. @ Find the maximum value in the Neon registers and move it to r2.
vmax.u16 d24, d25 vmax.u16 d24, d25
vpmax.u16 d24, d24 vpmax.u16 d24, d24
vpmax.u16 d24, d24 vpmax.u16 d24, d24
cmp r1, #0 adds r1, #8
vmov.u16 r2, d24[0] vmov.u16 r2, d24[0]
ble END beq END_MAX_ABS_VALUE_W16
LOOP_NO_UNROLLING: LOOP_MAX_ABS_VALUE_W16:
ldrsh r3, [r0], #2 ldrsh r3, [r0], #2
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value. eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
sub r12, r12, r3, asr #31 sub r12, r12, r3, asr #31
cmp r2, r12 cmp r2, r12
movlt r2, r12 movlt r2, r12
subs r1, #1 subs r1, #1
bne LOOP_NO_UNROLLING bne LOOP_MAX_ABS_VALUE_W16
END: END_MAX_ABS_VALUE_W16:
cmp r2, #0x8000 @ Guard against the case for -32768. cmp r2, #0x8000 @ Guard against the case for -32768.
subeq r2, #1 subeq r2, #1
mov r0, r2 mov r0, r2
bx lr bx lr
.fnend .fnend
@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
WebRtcSpl_MaxAbsValueW32:
.fnstart
cmp r0, #0
moveq r0, #-1
beq EXIT @ Return -1 for a NULL pointer.
cmp r1, #0 @ length
movle r0, #-1
ble EXIT @ Return -1 if length <= 0.
vmov.i32 q11, #0
vmov.i32 q12, #0
cmp r1, #8
blt LOOP_MAX_ABS_VALUE_W32
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
vld1.32 {q13, q14}, [r0]!
subs r1, #8 @ Counter for loops
vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000.
vabs.s32 q14, q14
vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000.
vmax.u32 q12, q14
bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
@ Find the maximum value in the Neon registers and move it to r2.
vmax.u32 q12, q11
vmax.u32 d24, d25
vpmax.u32 d24, d24
adds r1, #8
vmov.u32 r2, d24[0]
beq END_MAX_ABS_VALUE_W32
LOOP_MAX_ABS_VALUE_W32:
ldr r3, [r0], #4
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
sub r12, r12, r3, asr #31
cmp r2, r12
movcc r2, r12
subs r1, #1
bne LOOP_MAX_ABS_VALUE_W32
END_MAX_ABS_VALUE_W32:
mvn r0, #0x80000000 @ Guard against the case for 0x80000000.
cmp r2, r0
movcc r0, r2
EXIT:
bx lr
.fnend
@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
WebRtcSpl_MaxValueW16:
.fnstart
mov r2, #0x8000 @ Initialize the return value.
cmp r0, #0
beq END_MAX_VALUE_W16
cmp r1, #0
ble END_MAX_VALUE_W16
vmov.i16 q12, #0x8000
cmp r1, #8
blt LOOP_MAX_VALUE_W16
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
vld1.16 {q13}, [r0]!
subs r1, #8
vmax.s16 q12, q13
bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
@ Find the maximum value in the Neon registers and move it to r2.
vmax.s16 d24, d25
vpmax.s16 d24, d24
vpmax.s16 d24, d24
adds r1, #8
vmov.u16 r2, d24[0]
beq END_MAX_VALUE_W16
LOOP_MAX_VALUE_W16:
ldrsh r3, [r0], #2
cmp r2, r3
movlt r2, r3
subs r1, #1
bne LOOP_MAX_VALUE_W16
END_MAX_VALUE_W16:
mov r0, r2
bx lr
.fnend
@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
WebRtcSpl_MaxValueW32:
.fnstart
mov r2, #0x80000000 @ Initialize the return value.
cmp r0, #0
beq END_MAX_VALUE_W32
cmp r1, #0
ble END_MAX_VALUE_W32
vmov.i32 q11, #0x80000000
vmov.i32 q12, #0x80000000
cmp r1, #8
blt LOOP_MAX_VALUE_W32
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
vld1.32 {q13, q14}, [r0]!
subs r1, #8
vmax.s32 q11, q13
vmax.s32 q12, q14
bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
@ Find the maximum value in the Neon registers and move it to r2.
vmax.s32 q12, q11
vpmax.s32 d24, d25
vpmax.s32 d24, d24
adds r1, #8
vmov.s32 r2, d24[0]
beq END_MAX_VALUE_W32
LOOP_MAX_VALUE_W32:
ldr r3, [r0], #4
cmp r2, r3
movlt r2, r3
subs r1, #1
bne LOOP_MAX_VALUE_W32
END_MAX_VALUE_W32:
mov r0, r2
bx lr
.fnend
@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
WebRtcSpl_MinValueW16:
.fnstart
movw r2, #0x7FFF @ Initialize the return value.
cmp r0, #0
beq END_MIN_VALUE_W16
cmp r1, #0
ble END_MIN_VALUE_W16
vmov.i16 q12, #0x7FFF
cmp r1, #8
blt LOOP_MIN_VALUE_W16
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
vld1.16 {q13}, [r0]!
subs r1, #8
vmin.s16 q12, q13
bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
@ Find the maximum value in the Neon registers and move it to r2.
vmin.s16 d24, d25
vpmin.s16 d24, d24
vpmin.s16 d24, d24
adds r1, #8
vmov.s16 r2, d24[0]
sxth r2, r2
beq END_MIN_VALUE_W16
LOOP_MIN_VALUE_W16:
ldrsh r3, [r0], #2
cmp r2, r3
movge r2, r3
subs r1, #1
bne LOOP_MIN_VALUE_W16
END_MIN_VALUE_W16:
mov r0, r2
bx lr
.fnend
@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
WebRtcSpl_MinValueW32:
.fnstart
mov r2, #0x7FFFFFFF @ Initialize the return value.
cmp r0, #0
beq END_MIN_VALUE_W32
cmp r1, #0
ble END_MIN_VALUE_W32
vdup.32 q11, r2
vdup.32 q12, r2
cmp r1, #8
blt LOOP_MIN_VALUE_W32
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
vld1.32 {q13, q14}, [r0]!
subs r1, #8
vmin.s32 q11, q13
vmin.s32 q12, q14
bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
@ Find the maximum value in the Neon registers and move it to r2.
vmin.s32 q12, q11
vpmin.s32 d24, d25
vpmin.s32 d24, d24
adds r1, #8
vmov.s32 r2, d24[0]
beq END_MIN_VALUE_W32
LOOP_MIN_VALUE_W32:
ldr r3, [r0], #4
cmp r2, r3
movge r2, r3
subs r1, #1
bne LOOP_MIN_VALUE_W32
END_MIN_VALUE_W32:
mov r0, r2
bx lr
.fnend