diff --git a/src/common_audio/signal_processing/include/signal_processing_library.h b/src/common_audio/signal_processing/include/signal_processing_library.h index 348b5c8f1..4bcf68af9 100644 --- a/src/common_audio/signal_processing/include/signal_processing_library.h +++ b/src/common_audio/signal_processing/include/signal_processing_library.h @@ -34,6 +34,8 @@ #define WEBRTC_SPL_MAX_SEED_USED 0x80000000L #define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value #define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value +// TODO(kma/bjorn): For the next two macros, investigate how to correct the code +// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN. #define WEBRTC_SPL_ABS_W16(a) \ (((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a)) #define WEBRTC_SPL_ABS_W32(a) \ @@ -202,41 +204,130 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector, WebRtc_Word16 vector_length); // End: Copy and set operations. + // Minimum and maximum operations. Implementation in min_max_operations.c. // Returns the largest absolute value in a signed 16-bit vector. // // Input: -// - vector : Input vector. -// - length : Number of samples in vector. +// - vector : 16-bit input vector. +// - length : Number of samples in vector. // -// Return value : Maximum absolute value in vector. - +// Return value : Maximum absolute value in vector; +// or -1, if (vector == NULL || length <= 0). int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length); -WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector, - WebRtc_Word16 length); -WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector, - WebRtc_Word16 length); -WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector, - WebRtc_Word16 length); -WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, - WebRtc_Word16 length); +// Returns the largest absolute value in a signed 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector; +// or -1, if (vector == NULL || length <= 0). +int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length); + +// Returns the maximum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN +// is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible +// value and we can't catch errors purely based on it. +int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length); + +// Returns the maximum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN +// is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible +// value and we can't catch errors purely based on it. +int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length); + +// Returns the minimum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX +// is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible +// value and we can't catch errors purely based on it. +int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length); + +// Returns the minimum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX +// is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible +// value and we can't catch errors purely based on it. +int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length); + +// Returns the vector index to the largest absolute value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum absolute value in vector; +// or -1, if (vector == NULL || length <= 0). +int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length); + +// Returns the vector index to the maximum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector; +// or -1, if (vector == NULL || length <= 0). +int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length); + +// Returns the vector index to the maximum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector; +// or -1, if (vector == NULL || length <= 0). +int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length); + +// Returns the vector index to the minimum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector; +// or -1, if (vector == NULL || length <= 0). +int WebRtcSpl_MinIndexW16(const int16_t* vector, int length); + +// Returns the vector index to the minimum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector; +// or -1, if (vector == NULL || length <= 0). +int WebRtcSpl_MinIndexW32(const int32_t* vector, int length); -WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, - WebRtc_Word16 length); -WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, - WebRtc_Word16 length); -WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, - WebRtc_Word16 length); -WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, - WebRtc_Word16 length); -WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector, - WebRtc_Word16 length); -WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, - WebRtc_Word16 length); // End: Minimum and maximum operations. + // Vector scaling operations. Implementation in vector_scaling_operations.c. // Description at bottom of file. void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector, @@ -849,81 +940,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // Return value : Number of samples in vector // -// -// WebRtcSpl_MinValueW16(...) -// WebRtcSpl_MinValueW32(...) -// -// Returns the minimum value of a vector -// -// Input: -// - vector : Input vector -// - vector_length : Number of samples in vector -// -// Return value : Minimum sample value in vector -// - -// -// WebRtcSpl_MaxValueW16(...) -// WebRtcSpl_MaxValueW32(...) -// -// Returns the maximum value of a vector -// -// Input: -// - vector : Input vector -// - vector_length : Number of samples in vector -// -// Return value : Maximum sample value in vector -// - -// WebRtcSpl_MaxAbsValueW32(...) -// -// Returns the largest absolute value of a vector -// -// Input: -// - vector : Input vector -// - vector_length : Number of samples in vector -// -// Return value : Maximum absolute value in vector -// - -// -// WebRtcSpl_MaxAbsIndexW16(...) -// -// Returns the vector index to the largest absolute value of a vector -// -// Input: -// - vector : Input vector -// - vector_length : Number of samples in vector -// -// Return value : Index to maximum absolute value in vector -// - -// -// WebRtcSpl_MinIndexW16(...) -// WebRtcSpl_MinIndexW32(...) -// -// Returns the vector index to the minimum sample value of a vector -// -// Input: -// - vector : Input vector -// - vector_length : Number of samples in vector -// -// Return value : Index to minimum sample value in vector -// - -// -// WebRtcSpl_MaxIndexW16(...) -// WebRtcSpl_MaxIndexW32(...) -// -// Returns the vector index to the maximum sample value of a vector -// -// Input: -// - vector : Input vector -// - vector_length : Number of samples in vector -// -// Return value : Index to maximum sample value in vector -// - // // WebRtcSpl_VectorBitShiftW16(...) // WebRtcSpl_VectorBitShiftW32(...) @@ -1627,7 +1643,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // WebRtc_Word16 WebRtcSpl_SatW32ToW16(...) // // This function saturates a 32-bit word into a 16-bit word. -// +// // Input: // - value32 : The value of a 32-bit word. // @@ -1639,7 +1655,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // // This function multiply a 16-bit word by a 16-bit word, and accumulate this // value to a 32-bit integer. -// +// // Input: // - a : The value of the first 16-bit word. // - b : The value of the second 16-bit word. diff --git a/src/common_audio/signal_processing/min_max_operations.c b/src/common_audio/signal_processing/min_max_operations.c index 0d9bb8ce1..2ea743ae3 100644 --- a/src/common_audio/signal_processing/min_max_operations.c +++ b/src/common_audio/signal_processing/min_max_operations.c @@ -11,32 +11,35 @@ /* * This file contains the implementation of functions * WebRtcSpl_MaxAbsValueW16() - * WebRtcSpl_MaxAbsIndexW16() * WebRtcSpl_MaxAbsValueW32() * WebRtcSpl_MaxValueW16() - * WebRtcSpl_MaxIndexW16() * WebRtcSpl_MaxValueW32() - * WebRtcSpl_MaxIndexW32() * WebRtcSpl_MinValueW16() - * WebRtcSpl_MinIndexW16() * WebRtcSpl_MinValueW32() + * WebRtcSpl_MaxAbsIndexW16() + * WebRtcSpl_MaxIndexW16() + * WebRtcSpl_MaxIndexW32() + * WebRtcSpl_MinIndexW16() * WebRtcSpl_MinIndexW32() * - * The description header can be found in signal_processing_library.h. - * */ #include "signal_processing_library.h" #include +// TODO(bjorn/kma): Consolidate function pairs (e.g. combine +// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.) + #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) // Maximum absolute value of word16 vector. int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) { - int i = 0; - int absolute = 0; - int maximum = -1; // Return -1 if length <= 0. + int i = 0, absolute = 0, maximum = 0; + + if (vector == NULL || length <= 0) { + return -1; + } for (i = 0; i < length; i++) { absolute = abs((int)vector[i]); @@ -54,214 +57,201 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) { return (int16_t)maximum; } -#endif - -// Index of maximum absolute value in a word16 vector. -WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) -{ - WebRtc_Word16 tempMax; - WebRtc_Word16 absTemp; - WebRtc_Word16 tempMaxIndex = 0; - WebRtc_Word16 i = 0; - G_CONST WebRtc_Word16 *tmpvector = vector; - - tempMax = WEBRTC_SPL_ABS_W16(*tmpvector); - tmpvector++; - for (i = 1; i < length; i++) - { - absTemp = WEBRTC_SPL_ABS_W16(*tmpvector); - tmpvector++; - if (absTemp > tempMax) - { - tempMax = absTemp; - tempMaxIndex = i; - } - } - return tempMaxIndex; -} - // Maximum absolute value of word32 vector. -WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length) -{ - WebRtc_UWord32 tempMax = 0; - WebRtc_UWord32 absVal; - WebRtc_Word32 retval; - int i; - G_CONST WebRtc_Word32 *tmpvector = vector; +int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) { + // Use uint for the local variables, to accommodate the value + // of abs(0x80000000). - for (i = 0; i < length; i++) - { - absVal = WEBRTC_SPL_ABS_W32((*tmpvector)); - if (absVal > tempMax) - { - tempMax = absVal; - } - tmpvector++; + uint absolute = 0, maximum = 0; + int i = 0; + + if (vector == NULL || length <= 0) { + return -1; + } + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + if (absolute > maximum) { + maximum = absolute; } - retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX)); - return retval; + } + + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; } // Maximum value of word16 vector. #ifndef XSCALE_OPT -WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) -{ - WebRtc_Word16 tempMax; - WebRtc_Word16 i; - G_CONST WebRtc_Word16 *tmpvector = vector; +int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + int i = 0; - tempMax = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ > tempMax) - tempMax = vector[i]; - } - return tempMax; -} -#else -#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build") -#endif + if (vector == NULL || length <= 0) { + return maximum; + } -// Index of maximum value in a word16 vector. -WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length) -{ - WebRtc_Word16 tempMax; - WebRtc_Word16 tempMaxIndex = 0; - WebRtc_Word16 i = 0; - G_CONST WebRtc_Word16 *tmpvector = vector; - - tempMax = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ > tempMax) - { - tempMax = vector[i]; - tempMaxIndex = i; - } - } - return tempMaxIndex; + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; } // Maximum value of word32 vector. -#ifndef XSCALE_OPT -WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) -{ - WebRtc_Word32 tempMax; - WebRtc_Word16 i; - G_CONST WebRtc_Word32 *tmpvector = vector; +int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + int i = 0; - tempMax = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ > tempMax) - tempMax = vector[i]; - } - return tempMax; + if (vector == NULL || length <= 0) { + return maximum; + } + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; } #else +#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build") #pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build") #endif -// Index of maximum value in a word32 vector. -WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) -{ - WebRtc_Word32 tempMax; - WebRtc_Word16 tempMaxIndex = 0; - WebRtc_Word16 i = 0; - G_CONST WebRtc_Word32 *tmpvector = vector; +// Minimum value of word16 vector. +int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int i = 0; - tempMax = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ > tempMax) - { - tempMax = vector[i]; - tempMaxIndex = i; - } - } - return tempMaxIndex; + if (vector == NULL || length <= 0) { + return minimum; + } + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; } -// Minimum value of word16 vector. -WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length) -{ - WebRtc_Word16 tempMin; - WebRtc_Word16 i; - G_CONST WebRtc_Word16 *tmpvector = vector; +// Minimum value of word32 vector. +int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + int i = 0; - // Find the minimum value - tempMin = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ < tempMin) - tempMin = (vector[i]); + if (vector == NULL || length <= 0) { + return minimum; + } + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} +#endif + + +// Index of maximum absolute value in a word16 vector. +int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) { + // Use type int for local variables, to accomodate the value of abs(-32768). + + int i = 0, absolute = 0, maximum = 0, index = 0; + + if (vector == NULL || length <= 0) { + return -1; + } + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + index = i; } - return tempMin; + } + + return index; +} + +// Index of maximum value in a word16 vector. +int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) { + int i = 0, index = 0; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + + if (vector == NULL || length <= 0) { + return -1; + } + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word32 vector. +int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) { + int i = 0, index = 0; + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + + if (vector == NULL || length <= 0) { + return -1; + } + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; } // Index of minimum value in a word16 vector. #ifndef XSCALE_OPT -WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) -{ - WebRtc_Word16 tempMin; - WebRtc_Word16 tempMinIndex = 0; - WebRtc_Word16 i = 0; - G_CONST WebRtc_Word16* tmpvector = vector; +int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) { + int i = 0, index = 0; + int16_t minimum = WEBRTC_SPL_WORD16_MAX; - // Find index of smallest value - tempMin = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ < tempMin) - { - tempMin = vector[i]; - tempMinIndex = i; - } + if (vector == NULL || length <= 0) { + return -1; + } + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; } - return tempMinIndex; -} -#else -#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build") -#endif + } -// Minimum value of word32 vector. -WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length) -{ - WebRtc_Word32 tempMin; - WebRtc_Word16 i; - G_CONST WebRtc_Word32 *tmpvector = vector; - - // Find the minimum value - tempMin = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ < tempMin) - tempMin = (vector[i]); - } - return tempMin; + return index; } // Index of minimum value in a word32 vector. -#ifndef XSCALE_OPT -WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) -{ - WebRtc_Word32 tempMin; - WebRtc_Word16 tempMinIndex = 0; - WebRtc_Word16 i = 0; - G_CONST WebRtc_Word32 *tmpvector = vector; +int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) { + int i = 0, index = 0; + int32_t minimum = WEBRTC_SPL_WORD32_MAX; - // Find index of smallest value - tempMin = *tmpvector++; - for (i = 1; i < length; i++) - { - if (*tmpvector++ < tempMin) - { - tempMin = vector[i]; - tempMinIndex = i; - } + if (vector == NULL || length <= 0) { + return -1; + } + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; } - return tempMinIndex; + } + + return index; } + #else +#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build") #pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build") #endif diff --git a/src/common_audio/signal_processing/min_max_operations_neon.s b/src/common_audio/signal_processing/min_max_operations_neon.s index a131160fa..01831ef4a 100644 --- a/src/common_audio/signal_processing/min_max_operations_neon.s +++ b/src/common_audio/signal_processing/min_max_operations_neon.s @@ -18,50 +18,288 @@ .arch armv7-a .fpu neon .global WebRtcSpl_MaxAbsValueW16 +.global WebRtcSpl_MaxAbsValueW32 +.global WebRtcSpl_MaxValueW16 +.global WebRtcSpl_MaxValueW32 +.global WebRtcSpl_MinValueW16 +.global WebRtcSpl_MinValueW32 .align 2 +@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length); WebRtcSpl_MaxAbsValueW16: .fnstart + mov r2, #-1 @ Initialize the return value. + cmp r0, #0 + beq END_MAX_ABS_VALUE_W16 + cmp r1, #0 + ble END_MAX_ABS_VALUE_W16 + + cmp r1, #8 + blt LOOP_MAX_ABS_VALUE_W16 + vmov.i16 q12, #0 - mov r2, #-1 @ Return value for the maximum. - cmp r1, #0 @ length - ble END @ Return -1 if length <= 0. - cmp r1, #7 - ble LOOP_NO_UNROLLING + sub r1, #8 @ Counter for loops - lsr r3, r1, #3 - lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. - sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8. - -LOOP_UNROLLED_BY_8: - vld1.16 {d26, d27}, [r0]! - subs r3, #8 +LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16: + vld1.16 {q13}, [r0]! + subs r1, #8 vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768. vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768. - bne LOOP_UNROLLED_BY_8 + bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16 @ Find the maximum value in the Neon registers and move it to r2. vmax.u16 d24, d25 vpmax.u16 d24, d24 vpmax.u16 d24, d24 - cmp r1, #0 + adds r1, #8 vmov.u16 r2, d24[0] - ble END + beq END_MAX_ABS_VALUE_W16 -LOOP_NO_UNROLLING: +LOOP_MAX_ABS_VALUE_W16: ldrsh r3, [r0], #2 eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value. sub r12, r12, r3, asr #31 cmp r2, r12 movlt r2, r12 subs r1, #1 - bne LOOP_NO_UNROLLING + bne LOOP_MAX_ABS_VALUE_W16 -END: +END_MAX_ABS_VALUE_W16: cmp r2, #0x8000 @ Guard against the case for -32768. subeq r2, #1 mov r0, r2 bx lr .fnend + +@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length); +WebRtcSpl_MaxAbsValueW32: +.fnstart + + cmp r0, #0 + moveq r0, #-1 + beq EXIT @ Return -1 for a NULL pointer. + cmp r1, #0 @ length + movle r0, #-1 + ble EXIT @ Return -1 if length <= 0. + + vmov.i32 q11, #0 + vmov.i32 q12, #0 + cmp r1, #8 + blt LOOP_MAX_ABS_VALUE_W32 + + sub r1, #8 @ Counter for loops + +LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32: + vld1.32 {q13, q14}, [r0]! + subs r1, #8 @ Counter for loops + vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000. + vabs.s32 q14, q14 + vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000. + vmax.u32 q12, q14 + bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32 + + @ Find the maximum value in the Neon registers and move it to r2. + vmax.u32 q12, q11 + vmax.u32 d24, d25 + vpmax.u32 d24, d24 + adds r1, #8 + vmov.u32 r2, d24[0] + beq END_MAX_ABS_VALUE_W32 + +LOOP_MAX_ABS_VALUE_W32: + ldr r3, [r0], #4 + eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value. + sub r12, r12, r3, asr #31 + cmp r2, r12 + movcc r2, r12 + subs r1, #1 + bne LOOP_MAX_ABS_VALUE_W32 + +END_MAX_ABS_VALUE_W32: + mvn r0, #0x80000000 @ Guard against the case for 0x80000000. + cmp r2, r0 + movcc r0, r2 + +EXIT: + bx lr + +.fnend + +@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length); +WebRtcSpl_MaxValueW16: +.fnstart + + mov r2, #0x8000 @ Initialize the return value. + cmp r0, #0 + beq END_MAX_VALUE_W16 + cmp r1, #0 + ble END_MAX_VALUE_W16 + + vmov.i16 q12, #0x8000 + cmp r1, #8 + blt LOOP_MAX_VALUE_W16 + + sub r1, #8 @ Counter for loops + +LOOP_UNROLLED_BY_8_MAX_VALUE_W16: + vld1.16 {q13}, [r0]! + subs r1, #8 + vmax.s16 q12, q13 + bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16 + + @ Find the maximum value in the Neon registers and move it to r2. + vmax.s16 d24, d25 + vpmax.s16 d24, d24 + vpmax.s16 d24, d24 + adds r1, #8 + vmov.u16 r2, d24[0] + beq END_MAX_VALUE_W16 + +LOOP_MAX_VALUE_W16: + ldrsh r3, [r0], #2 + cmp r2, r3 + movlt r2, r3 + subs r1, #1 + bne LOOP_MAX_VALUE_W16 + +END_MAX_VALUE_W16: + mov r0, r2 + bx lr + +.fnend + +@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length); +WebRtcSpl_MaxValueW32: +.fnstart + + mov r2, #0x80000000 @ Initialize the return value. + cmp r0, #0 + beq END_MAX_VALUE_W32 + cmp r1, #0 + ble END_MAX_VALUE_W32 + + vmov.i32 q11, #0x80000000 + vmov.i32 q12, #0x80000000 + cmp r1, #8 + blt LOOP_MAX_VALUE_W32 + + sub r1, #8 @ Counter for loops + +LOOP_UNROLLED_BY_8_MAX_VALUE_W32: + vld1.32 {q13, q14}, [r0]! + subs r1, #8 + vmax.s32 q11, q13 + vmax.s32 q12, q14 + bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32 + + @ Find the maximum value in the Neon registers and move it to r2. + vmax.s32 q12, q11 + vpmax.s32 d24, d25 + vpmax.s32 d24, d24 + adds r1, #8 + vmov.s32 r2, d24[0] + beq END_MAX_VALUE_W32 + +LOOP_MAX_VALUE_W32: + ldr r3, [r0], #4 + cmp r2, r3 + movlt r2, r3 + subs r1, #1 + bne LOOP_MAX_VALUE_W32 + +END_MAX_VALUE_W32: + mov r0, r2 + bx lr + +.fnend + +@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length); +WebRtcSpl_MinValueW16: +.fnstart + + movw r2, #0x7FFF @ Initialize the return value. + cmp r0, #0 + beq END_MIN_VALUE_W16 + cmp r1, #0 + ble END_MIN_VALUE_W16 + + vmov.i16 q12, #0x7FFF + cmp r1, #8 + blt LOOP_MIN_VALUE_W16 + + sub r1, #8 @ Counter for loops + +LOOP_UNROLLED_BY_8_MIN_VALUE_W16: + vld1.16 {q13}, [r0]! + subs r1, #8 + vmin.s16 q12, q13 + bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16 + + @ Find the maximum value in the Neon registers and move it to r2. + vmin.s16 d24, d25 + vpmin.s16 d24, d24 + vpmin.s16 d24, d24 + adds r1, #8 + vmov.s16 r2, d24[0] + sxth r2, r2 + beq END_MIN_VALUE_W16 + +LOOP_MIN_VALUE_W16: + ldrsh r3, [r0], #2 + cmp r2, r3 + movge r2, r3 + subs r1, #1 + bne LOOP_MIN_VALUE_W16 + +END_MIN_VALUE_W16: + mov r0, r2 + bx lr + +.fnend + +@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length); +WebRtcSpl_MinValueW32: +.fnstart + + mov r2, #0x7FFFFFFF @ Initialize the return value. + cmp r0, #0 + beq END_MIN_VALUE_W32 + cmp r1, #0 + ble END_MIN_VALUE_W32 + + vdup.32 q11, r2 + vdup.32 q12, r2 + cmp r1, #8 + blt LOOP_MIN_VALUE_W32 + + sub r1, #8 @ Counter for loops + +LOOP_UNROLLED_BY_8_MIN_VALUE_W32: + vld1.32 {q13, q14}, [r0]! + subs r1, #8 + vmin.s32 q11, q13 + vmin.s32 q12, q14 + bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32 + + @ Find the maximum value in the Neon registers and move it to r2. + vmin.s32 q12, q11 + vpmin.s32 d24, d25 + vpmin.s32 d24, d24 + adds r1, #8 + vmov.s32 r2, d24[0] + beq END_MIN_VALUE_W32 + +LOOP_MIN_VALUE_W32: + ldr r3, [r0], #4 + cmp r2, r3 + movge r2, r3 + subs r1, #1 + bne LOOP_MIN_VALUE_W32 + +END_MIN_VALUE_W32: + mov r0, r2 + bx lr + +.fnend