Optimizations on several SPL min max operations in ARM, and refactoring in C.

Touched C and assembly functions are tested with a new unit test which is not in the code base yet.
Review URL: https://webrtc-codereview.appspot.com/428004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1974 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2012-04-02 03:55:20 +00:00
parent f3bbc3e5b3
commit 95c3d408f5
3 changed files with 544 additions and 300 deletions

View File

@ -34,6 +34,8 @@
#define WEBRTC_SPL_MAX_SEED_USED 0x80000000L
#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
#define WEBRTC_SPL_ABS_W16(a) \
(((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
#define WEBRTC_SPL_ABS_W32(a) \
@ -202,41 +204,130 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
WebRtc_Word16 vector_length);
// End: Copy and set operations.
// Minimum and maximum operations. Implementation in min_max_operations.c.
// Returns the largest absolute value in a signed 16-bit vector.
//
// Input:
// - vector : Input vector.
// - length : Number of samples in vector.
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector.
// Return value : Maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
// Returns the largest absolute value in a signed 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
// Returns the maximum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
// is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
// value and we can't catch errors purely based on it.
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
// Returns the maximum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
// is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
// value and we can't catch errors purely based on it.
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
// Returns the minimum value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
// is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
// value and we can't catch errors purely based on it.
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
// Returns the minimum value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
// is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
// value and we can't catch errors purely based on it.
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
// Returns the vector index to the largest absolute value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length);
// Returns the vector index to the maximum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length);
// Returns the vector index to the maximum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the maximum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length);
// Returns the vector index to the minimum sample value of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MinIndexW16(const int16_t* vector, int length);
// Returns the vector index to the minimum sample value of a 32-bit vector.
//
// Input:
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Index to the mimimum value in vector;
// or -1, if (vector == NULL || length <= 0).
int WebRtcSpl_MinIndexW32(const int32_t* vector, int length);
WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector,
WebRtc_Word16 length);
WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector,
WebRtc_Word16 length);
// End: Minimum and maximum operations.
// Vector scaling operations. Implementation in vector_scaling_operations.c.
// Description at bottom of file.
void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
@ -849,81 +940,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// Return value : Number of samples in vector
//
//
// WebRtcSpl_MinValueW16(...)
// WebRtcSpl_MinValueW32(...)
//
// Returns the minimum value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Minimum sample value in vector
//
//
// WebRtcSpl_MaxValueW16(...)
// WebRtcSpl_MaxValueW32(...)
//
// Returns the maximum value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Maximum sample value in vector
//
// WebRtcSpl_MaxAbsValueW32(...)
//
// Returns the largest absolute value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Maximum absolute value in vector
//
//
// WebRtcSpl_MaxAbsIndexW16(...)
//
// Returns the vector index to the largest absolute value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Index to maximum absolute value in vector
//
//
// WebRtcSpl_MinIndexW16(...)
// WebRtcSpl_MinIndexW32(...)
//
// Returns the vector index to the minimum sample value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Index to minimum sample value in vector
//
//
// WebRtcSpl_MaxIndexW16(...)
// WebRtcSpl_MaxIndexW32(...)
//
// Returns the vector index to the maximum sample value of a vector
//
// Input:
// - vector : Input vector
// - vector_length : Number of samples in vector
//
// Return value : Index to maximum sample value in vector
//
//
// WebRtcSpl_VectorBitShiftW16(...)
// WebRtcSpl_VectorBitShiftW32(...)
@ -1627,7 +1643,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// WebRtc_Word16 WebRtcSpl_SatW32ToW16(...)
//
// This function saturates a 32-bit word into a 16-bit word.
//
//
// Input:
// - value32 : The value of a 32-bit word.
//
@ -1639,7 +1655,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
//
// This function multiply a 16-bit word by a 16-bit word, and accumulate this
// value to a 32-bit integer.
//
//
// Input:
// - a : The value of the first 16-bit word.
// - b : The value of the second 16-bit word.

View File

@ -11,32 +11,35 @@
/*
* This file contains the implementation of functions
* WebRtcSpl_MaxAbsValueW16()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxAbsValueW32()
* WebRtcSpl_MaxValueW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxValueW32()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinValueW16()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinValueW32()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinIndexW32()
*
* The description header can be found in signal_processing_library.h.
*
*/
#include "signal_processing_library.h"
#include <stdlib.h>
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
// Maximum absolute value of word16 vector.
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
int i = 0;
int absolute = 0;
int maximum = -1; // Return -1 if length <= 0.
int i = 0, absolute = 0, maximum = 0;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
@ -54,214 +57,201 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
return (int16_t)maximum;
}
#endif
// Index of maximum absolute value in a word16 vector.
WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
{
WebRtc_Word16 tempMax;
WebRtc_Word16 absTemp;
WebRtc_Word16 tempMaxIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word16 *tmpvector = vector;
tempMax = WEBRTC_SPL_ABS_W16(*tmpvector);
tmpvector++;
for (i = 1; i < length; i++)
{
absTemp = WEBRTC_SPL_ABS_W16(*tmpvector);
tmpvector++;
if (absTemp > tempMax)
{
tempMax = absTemp;
tempMaxIndex = i;
}
}
return tempMaxIndex;
}
// Maximum absolute value of word32 vector.
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
{
WebRtc_UWord32 tempMax = 0;
WebRtc_UWord32 absVal;
WebRtc_Word32 retval;
int i;
G_CONST WebRtc_Word32 *tmpvector = vector;
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
// Use uint for the local variables, to accommodate the value
// of abs(0x80000000).
for (i = 0; i < length; i++)
{
absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
if (absVal > tempMax)
{
tempMax = absVal;
}
tmpvector++;
uint absolute = 0, maximum = 0;
int i = 0;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
}
retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX));
return retval;
}
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
return (int32_t)maximum;
}
// Maximum value of word16 vector.
#ifndef XSCALE_OPT
WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
{
WebRtc_Word16 tempMax;
WebRtc_Word16 i;
G_CONST WebRtc_Word16 *tmpvector = vector;
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
int i = 0;
tempMax = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ > tempMax)
tempMax = vector[i];
}
return tempMax;
}
#else
#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
#endif
if (vector == NULL || length <= 0) {
return maximum;
}
// Index of maximum value in a word16 vector.
WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
{
WebRtc_Word16 tempMax;
WebRtc_Word16 tempMaxIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word16 *tmpvector = vector;
tempMax = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ > tempMax)
{
tempMax = vector[i];
tempMaxIndex = i;
}
}
return tempMaxIndex;
for (i = 0; i < length; i++) {
if (vector[i] > maximum)
maximum = vector[i];
}
return maximum;
}
// Maximum value of word32 vector.
#ifndef XSCALE_OPT
WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
{
WebRtc_Word32 tempMax;
WebRtc_Word16 i;
G_CONST WebRtc_Word32 *tmpvector = vector;
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
int i = 0;
tempMax = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ > tempMax)
tempMax = vector[i];
}
return tempMax;
if (vector == NULL || length <= 0) {
return maximum;
}
for (i = 0; i < length; i++) {
if (vector[i] > maximum)
maximum = vector[i];
}
return maximum;
}
#else
#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
#pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
#endif
// Index of maximum value in a word32 vector.
WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
{
WebRtc_Word32 tempMax;
WebRtc_Word16 tempMaxIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word32 *tmpvector = vector;
// Minimum value of word16 vector.
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int i = 0;
tempMax = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ > tempMax)
{
tempMax = vector[i];
tempMaxIndex = i;
}
}
return tempMaxIndex;
if (vector == NULL || length <= 0) {
return minimum;
}
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
// Minimum value of word16 vector.
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
{
WebRtc_Word16 tempMin;
WebRtc_Word16 i;
G_CONST WebRtc_Word16 *tmpvector = vector;
// Minimum value of word32 vector.
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
int i = 0;
// Find the minimum value
tempMin = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ < tempMin)
tempMin = (vector[i]);
if (vector == NULL || length <= 0) {
return minimum;
}
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
#endif
// Index of maximum absolute value in a word16 vector.
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
// Use type int for local variables, to accomodate the value of abs(-32768).
int i = 0, absolute = 0, maximum = 0, index = 0;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
index = i;
}
return tempMin;
}
return index;
}
// Index of maximum value in a word16 vector.
int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) {
int i = 0, index = 0;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of maximum value in a word32 vector.
int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) {
int i = 0, index = 0;
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of minimum value in a word16 vector.
#ifndef XSCALE_OPT
WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
{
WebRtc_Word16 tempMin;
WebRtc_Word16 tempMinIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word16* tmpvector = vector;
int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) {
int i = 0, index = 0;
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
// Find index of smallest value
tempMin = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ < tempMin)
{
tempMin = vector[i];
tempMinIndex = i;
}
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
if (vector[i] < minimum) {
minimum = vector[i];
index = i;
}
return tempMinIndex;
}
#else
#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
#endif
}
// Minimum value of word32 vector.
WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
{
WebRtc_Word32 tempMin;
WebRtc_Word16 i;
G_CONST WebRtc_Word32 *tmpvector = vector;
// Find the minimum value
tempMin = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ < tempMin)
tempMin = (vector[i]);
}
return tempMin;
return index;
}
// Index of minimum value in a word32 vector.
#ifndef XSCALE_OPT
WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
{
WebRtc_Word32 tempMin;
WebRtc_Word16 tempMinIndex = 0;
WebRtc_Word16 i = 0;
G_CONST WebRtc_Word32 *tmpvector = vector;
int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) {
int i = 0, index = 0;
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
// Find index of smallest value
tempMin = *tmpvector++;
for (i = 1; i < length; i++)
{
if (*tmpvector++ < tempMin)
{
tempMin = vector[i];
tempMinIndex = i;
}
if (vector == NULL || length <= 0) {
return -1;
}
for (i = 0; i < length; i++) {
if (vector[i] < minimum) {
minimum = vector[i];
index = i;
}
return tempMinIndex;
}
return index;
}
#else
#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
#pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
#endif

View File

@ -18,50 +18,288 @@
.arch armv7-a
.fpu neon
.global WebRtcSpl_MaxAbsValueW16
.global WebRtcSpl_MaxAbsValueW32
.global WebRtcSpl_MaxValueW16
.global WebRtcSpl_MaxValueW32
.global WebRtcSpl_MinValueW16
.global WebRtcSpl_MinValueW32
.align 2
@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
WebRtcSpl_MaxAbsValueW16:
.fnstart
mov r2, #-1 @ Initialize the return value.
cmp r0, #0
beq END_MAX_ABS_VALUE_W16
cmp r1, #0
ble END_MAX_ABS_VALUE_W16
cmp r1, #8
blt LOOP_MAX_ABS_VALUE_W16
vmov.i16 q12, #0
mov r2, #-1 @ Return value for the maximum.
cmp r1, #0 @ length
ble END @ Return -1 if length <= 0.
cmp r1, #7
ble LOOP_NO_UNROLLING
sub r1, #8 @ Counter for loops
lsr r3, r1, #3
lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8.
LOOP_UNROLLED_BY_8:
vld1.16 {d26, d27}, [r0]!
subs r3, #8
LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
vld1.16 {q13}, [r0]!
subs r1, #8
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
bne LOOP_UNROLLED_BY_8
bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
@ Find the maximum value in the Neon registers and move it to r2.
vmax.u16 d24, d25
vpmax.u16 d24, d24
vpmax.u16 d24, d24
cmp r1, #0
adds r1, #8
vmov.u16 r2, d24[0]
ble END
beq END_MAX_ABS_VALUE_W16
LOOP_NO_UNROLLING:
LOOP_MAX_ABS_VALUE_W16:
ldrsh r3, [r0], #2
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
sub r12, r12, r3, asr #31
cmp r2, r12
movlt r2, r12
subs r1, #1
bne LOOP_NO_UNROLLING
bne LOOP_MAX_ABS_VALUE_W16
END:
END_MAX_ABS_VALUE_W16:
cmp r2, #0x8000 @ Guard against the case for -32768.
subeq r2, #1
mov r0, r2
bx lr
.fnend
@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
WebRtcSpl_MaxAbsValueW32:
.fnstart
cmp r0, #0
moveq r0, #-1
beq EXIT @ Return -1 for a NULL pointer.
cmp r1, #0 @ length
movle r0, #-1
ble EXIT @ Return -1 if length <= 0.
vmov.i32 q11, #0
vmov.i32 q12, #0
cmp r1, #8
blt LOOP_MAX_ABS_VALUE_W32
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
vld1.32 {q13, q14}, [r0]!
subs r1, #8 @ Counter for loops
vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000.
vabs.s32 q14, q14
vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000.
vmax.u32 q12, q14
bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
@ Find the maximum value in the Neon registers and move it to r2.
vmax.u32 q12, q11
vmax.u32 d24, d25
vpmax.u32 d24, d24
adds r1, #8
vmov.u32 r2, d24[0]
beq END_MAX_ABS_VALUE_W32
LOOP_MAX_ABS_VALUE_W32:
ldr r3, [r0], #4
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
sub r12, r12, r3, asr #31
cmp r2, r12
movcc r2, r12
subs r1, #1
bne LOOP_MAX_ABS_VALUE_W32
END_MAX_ABS_VALUE_W32:
mvn r0, #0x80000000 @ Guard against the case for 0x80000000.
cmp r2, r0
movcc r0, r2
EXIT:
bx lr
.fnend
@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
WebRtcSpl_MaxValueW16:
.fnstart
mov r2, #0x8000 @ Initialize the return value.
cmp r0, #0
beq END_MAX_VALUE_W16
cmp r1, #0
ble END_MAX_VALUE_W16
vmov.i16 q12, #0x8000
cmp r1, #8
blt LOOP_MAX_VALUE_W16
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
vld1.16 {q13}, [r0]!
subs r1, #8
vmax.s16 q12, q13
bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
@ Find the maximum value in the Neon registers and move it to r2.
vmax.s16 d24, d25
vpmax.s16 d24, d24
vpmax.s16 d24, d24
adds r1, #8
vmov.u16 r2, d24[0]
beq END_MAX_VALUE_W16
LOOP_MAX_VALUE_W16:
ldrsh r3, [r0], #2
cmp r2, r3
movlt r2, r3
subs r1, #1
bne LOOP_MAX_VALUE_W16
END_MAX_VALUE_W16:
mov r0, r2
bx lr
.fnend
@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
WebRtcSpl_MaxValueW32:
.fnstart
mov r2, #0x80000000 @ Initialize the return value.
cmp r0, #0
beq END_MAX_VALUE_W32
cmp r1, #0
ble END_MAX_VALUE_W32
vmov.i32 q11, #0x80000000
vmov.i32 q12, #0x80000000
cmp r1, #8
blt LOOP_MAX_VALUE_W32
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
vld1.32 {q13, q14}, [r0]!
subs r1, #8
vmax.s32 q11, q13
vmax.s32 q12, q14
bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
@ Find the maximum value in the Neon registers and move it to r2.
vmax.s32 q12, q11
vpmax.s32 d24, d25
vpmax.s32 d24, d24
adds r1, #8
vmov.s32 r2, d24[0]
beq END_MAX_VALUE_W32
LOOP_MAX_VALUE_W32:
ldr r3, [r0], #4
cmp r2, r3
movlt r2, r3
subs r1, #1
bne LOOP_MAX_VALUE_W32
END_MAX_VALUE_W32:
mov r0, r2
bx lr
.fnend
@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
WebRtcSpl_MinValueW16:
.fnstart
movw r2, #0x7FFF @ Initialize the return value.
cmp r0, #0
beq END_MIN_VALUE_W16
cmp r1, #0
ble END_MIN_VALUE_W16
vmov.i16 q12, #0x7FFF
cmp r1, #8
blt LOOP_MIN_VALUE_W16
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
vld1.16 {q13}, [r0]!
subs r1, #8
vmin.s16 q12, q13
bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
@ Find the maximum value in the Neon registers and move it to r2.
vmin.s16 d24, d25
vpmin.s16 d24, d24
vpmin.s16 d24, d24
adds r1, #8
vmov.s16 r2, d24[0]
sxth r2, r2
beq END_MIN_VALUE_W16
LOOP_MIN_VALUE_W16:
ldrsh r3, [r0], #2
cmp r2, r3
movge r2, r3
subs r1, #1
bne LOOP_MIN_VALUE_W16
END_MIN_VALUE_W16:
mov r0, r2
bx lr
.fnend
@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
WebRtcSpl_MinValueW32:
.fnstart
mov r2, #0x7FFFFFFF @ Initialize the return value.
cmp r0, #0
beq END_MIN_VALUE_W32
cmp r1, #0
ble END_MIN_VALUE_W32
vdup.32 q11, r2
vdup.32 q12, r2
cmp r1, #8
blt LOOP_MIN_VALUE_W32
sub r1, #8 @ Counter for loops
LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
vld1.32 {q13, q14}, [r0]!
subs r1, #8
vmin.s32 q11, q13
vmin.s32 q12, q14
bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
@ Find the maximum value in the Neon registers and move it to r2.
vmin.s32 q12, q11
vpmin.s32 d24, d25
vpmin.s32 d24, d24
adds r1, #8
vmov.s32 r2, d24[0]
beq END_MIN_VALUE_W32
LOOP_MIN_VALUE_W32:
ldr r3, [r0], #4
cmp r2, r3
movge r2, r3
subs r1, #1
bne LOOP_MIN_VALUE_W32
END_MIN_VALUE_W32:
mov r0, r2
bx lr
.fnend