Optimizations on several SPL min max operations in ARM, and refactoring in C.
Touched C and assembly functions are tested with a new unit test which is not in the code base yet. Review URL: https://webrtc-codereview.appspot.com/428004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1974 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
f3bbc3e5b3
commit
95c3d408f5
@ -34,6 +34,8 @@
|
|||||||
#define WEBRTC_SPL_MAX_SEED_USED 0x80000000L
|
#define WEBRTC_SPL_MAX_SEED_USED 0x80000000L
|
||||||
#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
|
#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
|
||||||
#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
|
#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
|
||||||
|
// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
|
||||||
|
// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
|
||||||
#define WEBRTC_SPL_ABS_W16(a) \
|
#define WEBRTC_SPL_ABS_W16(a) \
|
||||||
(((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
|
(((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
|
||||||
#define WEBRTC_SPL_ABS_W32(a) \
|
#define WEBRTC_SPL_ABS_W32(a) \
|
||||||
@ -202,41 +204,130 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
|
|||||||
WebRtc_Word16 vector_length);
|
WebRtc_Word16 vector_length);
|
||||||
// End: Copy and set operations.
|
// End: Copy and set operations.
|
||||||
|
|
||||||
|
|
||||||
// Minimum and maximum operations. Implementation in min_max_operations.c.
|
// Minimum and maximum operations. Implementation in min_max_operations.c.
|
||||||
|
|
||||||
// Returns the largest absolute value in a signed 16-bit vector.
|
// Returns the largest absolute value in a signed 16-bit vector.
|
||||||
//
|
//
|
||||||
// Input:
|
// Input:
|
||||||
// - vector : Input vector.
|
// - vector : 16-bit input vector.
|
||||||
// - length : Number of samples in vector.
|
// - length : Number of samples in vector.
|
||||||
//
|
//
|
||||||
// Return value : Maximum absolute value in vector.
|
// Return value : Maximum absolute value in vector;
|
||||||
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
|
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
|
// Returns the largest absolute value in a signed 32-bit vector.
|
||||||
WebRtc_Word16 length);
|
//
|
||||||
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
|
// Input:
|
||||||
WebRtc_Word16 length);
|
// - vector : 32-bit input vector.
|
||||||
WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector,
|
// - length : Number of samples in vector.
|
||||||
WebRtc_Word16 length);
|
//
|
||||||
WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector,
|
// Return value : Maximum absolute value in vector;
|
||||||
WebRtc_Word16 length);
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
|
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the maximum value of a 16-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 16-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Maximum sample value in |vector|.
|
||||||
|
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
|
||||||
|
// is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
|
||||||
|
// value and we can't catch errors purely based on it.
|
||||||
|
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the maximum value of a 32-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 32-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Maximum sample value in |vector|.
|
||||||
|
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
|
||||||
|
// is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
|
||||||
|
// value and we can't catch errors purely based on it.
|
||||||
|
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the minimum value of a 16-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 16-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Minimum sample value in |vector|.
|
||||||
|
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
|
||||||
|
// is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
|
||||||
|
// value and we can't catch errors purely based on it.
|
||||||
|
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the minimum value of a 32-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 32-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Minimum sample value in |vector|.
|
||||||
|
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
|
||||||
|
// is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
|
||||||
|
// value and we can't catch errors purely based on it.
|
||||||
|
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the vector index to the largest absolute value of a 16-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 16-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Index to the maximum absolute value in vector;
|
||||||
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
|
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the vector index to the maximum sample value of a 16-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 16-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Index to the maximum value in vector;
|
||||||
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
|
int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the vector index to the maximum sample value of a 32-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 32-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Index to the maximum value in vector;
|
||||||
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
|
int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the vector index to the minimum sample value of a 16-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 16-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Index to the mimimum value in vector;
|
||||||
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
|
int WebRtcSpl_MinIndexW16(const int16_t* vector, int length);
|
||||||
|
|
||||||
|
// Returns the vector index to the minimum sample value of a 32-bit vector.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// - vector : 32-bit input vector.
|
||||||
|
// - length : Number of samples in vector.
|
||||||
|
//
|
||||||
|
// Return value : Index to the mimimum value in vector;
|
||||||
|
// or -1, if (vector == NULL || length <= 0).
|
||||||
|
int WebRtcSpl_MinIndexW32(const int32_t* vector, int length);
|
||||||
|
|
||||||
WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector,
|
|
||||||
WebRtc_Word16 length);
|
|
||||||
WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector,
|
|
||||||
WebRtc_Word16 length);
|
|
||||||
WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector,
|
|
||||||
WebRtc_Word16 length);
|
|
||||||
WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector,
|
|
||||||
WebRtc_Word16 length);
|
|
||||||
WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector,
|
|
||||||
WebRtc_Word16 length);
|
|
||||||
WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector,
|
|
||||||
WebRtc_Word16 length);
|
|
||||||
// End: Minimum and maximum operations.
|
// End: Minimum and maximum operations.
|
||||||
|
|
||||||
|
|
||||||
// Vector scaling operations. Implementation in vector_scaling_operations.c.
|
// Vector scaling operations. Implementation in vector_scaling_operations.c.
|
||||||
// Description at bottom of file.
|
// Description at bottom of file.
|
||||||
void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
|
void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
|
||||||
@ -849,81 +940,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
|
|||||||
// Return value : Number of samples in vector
|
// Return value : Number of samples in vector
|
||||||
//
|
//
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_MinValueW16(...)
|
|
||||||
// WebRtcSpl_MinValueW32(...)
|
|
||||||
//
|
|
||||||
// Returns the minimum value of a vector
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - vector : Input vector
|
|
||||||
// - vector_length : Number of samples in vector
|
|
||||||
//
|
|
||||||
// Return value : Minimum sample value in vector
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_MaxValueW16(...)
|
|
||||||
// WebRtcSpl_MaxValueW32(...)
|
|
||||||
//
|
|
||||||
// Returns the maximum value of a vector
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - vector : Input vector
|
|
||||||
// - vector_length : Number of samples in vector
|
|
||||||
//
|
|
||||||
// Return value : Maximum sample value in vector
|
|
||||||
//
|
|
||||||
|
|
||||||
// WebRtcSpl_MaxAbsValueW32(...)
|
|
||||||
//
|
|
||||||
// Returns the largest absolute value of a vector
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - vector : Input vector
|
|
||||||
// - vector_length : Number of samples in vector
|
|
||||||
//
|
|
||||||
// Return value : Maximum absolute value in vector
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_MaxAbsIndexW16(...)
|
|
||||||
//
|
|
||||||
// Returns the vector index to the largest absolute value of a vector
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - vector : Input vector
|
|
||||||
// - vector_length : Number of samples in vector
|
|
||||||
//
|
|
||||||
// Return value : Index to maximum absolute value in vector
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_MinIndexW16(...)
|
|
||||||
// WebRtcSpl_MinIndexW32(...)
|
|
||||||
//
|
|
||||||
// Returns the vector index to the minimum sample value of a vector
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - vector : Input vector
|
|
||||||
// - vector_length : Number of samples in vector
|
|
||||||
//
|
|
||||||
// Return value : Index to minimum sample value in vector
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
|
||||||
// WebRtcSpl_MaxIndexW16(...)
|
|
||||||
// WebRtcSpl_MaxIndexW32(...)
|
|
||||||
//
|
|
||||||
// Returns the vector index to the maximum sample value of a vector
|
|
||||||
//
|
|
||||||
// Input:
|
|
||||||
// - vector : Input vector
|
|
||||||
// - vector_length : Number of samples in vector
|
|
||||||
//
|
|
||||||
// Return value : Index to maximum sample value in vector
|
|
||||||
//
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// WebRtcSpl_VectorBitShiftW16(...)
|
// WebRtcSpl_VectorBitShiftW16(...)
|
||||||
// WebRtcSpl_VectorBitShiftW32(...)
|
// WebRtcSpl_VectorBitShiftW32(...)
|
||||||
|
@ -11,32 +11,35 @@
|
|||||||
/*
|
/*
|
||||||
* This file contains the implementation of functions
|
* This file contains the implementation of functions
|
||||||
* WebRtcSpl_MaxAbsValueW16()
|
* WebRtcSpl_MaxAbsValueW16()
|
||||||
* WebRtcSpl_MaxAbsIndexW16()
|
|
||||||
* WebRtcSpl_MaxAbsValueW32()
|
* WebRtcSpl_MaxAbsValueW32()
|
||||||
* WebRtcSpl_MaxValueW16()
|
* WebRtcSpl_MaxValueW16()
|
||||||
* WebRtcSpl_MaxIndexW16()
|
|
||||||
* WebRtcSpl_MaxValueW32()
|
* WebRtcSpl_MaxValueW32()
|
||||||
* WebRtcSpl_MaxIndexW32()
|
|
||||||
* WebRtcSpl_MinValueW16()
|
* WebRtcSpl_MinValueW16()
|
||||||
* WebRtcSpl_MinIndexW16()
|
|
||||||
* WebRtcSpl_MinValueW32()
|
* WebRtcSpl_MinValueW32()
|
||||||
|
* WebRtcSpl_MaxAbsIndexW16()
|
||||||
|
* WebRtcSpl_MaxIndexW16()
|
||||||
|
* WebRtcSpl_MaxIndexW32()
|
||||||
|
* WebRtcSpl_MinIndexW16()
|
||||||
* WebRtcSpl_MinIndexW32()
|
* WebRtcSpl_MinIndexW32()
|
||||||
*
|
*
|
||||||
* The description header can be found in signal_processing_library.h.
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "signal_processing_library.h"
|
#include "signal_processing_library.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
|
||||||
|
// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
|
||||||
|
|
||||||
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
|
||||||
|
|
||||||
// Maximum absolute value of word16 vector.
|
// Maximum absolute value of word16 vector.
|
||||||
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
|
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
|
||||||
int i = 0;
|
int i = 0, absolute = 0, maximum = 0;
|
||||||
int absolute = 0;
|
|
||||||
int maximum = -1; // Return -1 if length <= 0.
|
if (vector == NULL || length <= 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < length; i++) {
|
for (i = 0; i < length; i++) {
|
||||||
absolute = abs((int)vector[i]);
|
absolute = abs((int)vector[i]);
|
||||||
@ -54,214 +57,201 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
|
|||||||
return (int16_t)maximum;
|
return (int16_t)maximum;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Index of maximum absolute value in a word16 vector.
|
|
||||||
WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
|
|
||||||
{
|
|
||||||
WebRtc_Word16 tempMax;
|
|
||||||
WebRtc_Word16 absTemp;
|
|
||||||
WebRtc_Word16 tempMaxIndex = 0;
|
|
||||||
WebRtc_Word16 i = 0;
|
|
||||||
G_CONST WebRtc_Word16 *tmpvector = vector;
|
|
||||||
|
|
||||||
tempMax = WEBRTC_SPL_ABS_W16(*tmpvector);
|
|
||||||
tmpvector++;
|
|
||||||
for (i = 1; i < length; i++)
|
|
||||||
{
|
|
||||||
absTemp = WEBRTC_SPL_ABS_W16(*tmpvector);
|
|
||||||
tmpvector++;
|
|
||||||
if (absTemp > tempMax)
|
|
||||||
{
|
|
||||||
tempMax = absTemp;
|
|
||||||
tempMaxIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tempMaxIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Maximum absolute value of word32 vector.
|
// Maximum absolute value of word32 vector.
|
||||||
WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
|
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
|
||||||
{
|
// Use uint for the local variables, to accommodate the value
|
||||||
WebRtc_UWord32 tempMax = 0;
|
// of abs(0x80000000).
|
||||||
WebRtc_UWord32 absVal;
|
|
||||||
WebRtc_Word32 retval;
|
|
||||||
int i;
|
|
||||||
G_CONST WebRtc_Word32 *tmpvector = vector;
|
|
||||||
|
|
||||||
for (i = 0; i < length; i++)
|
uint absolute = 0, maximum = 0;
|
||||||
{
|
int i = 0;
|
||||||
absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
|
|
||||||
if (absVal > tempMax)
|
if (vector == NULL || length <= 0) {
|
||||||
{
|
return -1;
|
||||||
tempMax = absVal;
|
}
|
||||||
}
|
|
||||||
tmpvector++;
|
for (i = 0; i < length; i++) {
|
||||||
|
absolute = abs((int)vector[i]);
|
||||||
|
if (absolute > maximum) {
|
||||||
|
maximum = absolute;
|
||||||
}
|
}
|
||||||
retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX));
|
}
|
||||||
return retval;
|
|
||||||
|
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
|
||||||
|
|
||||||
|
return (int32_t)maximum;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maximum value of word16 vector.
|
// Maximum value of word16 vector.
|
||||||
#ifndef XSCALE_OPT
|
#ifndef XSCALE_OPT
|
||||||
WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
|
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
|
||||||
{
|
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
|
||||||
WebRtc_Word16 tempMax;
|
int i = 0;
|
||||||
WebRtc_Word16 i;
|
|
||||||
G_CONST WebRtc_Word16 *tmpvector = vector;
|
|
||||||
|
|
||||||
tempMax = *tmpvector++;
|
if (vector == NULL || length <= 0) {
|
||||||
for (i = 1; i < length; i++)
|
return maximum;
|
||||||
{
|
}
|
||||||
if (*tmpvector++ > tempMax)
|
|
||||||
tempMax = vector[i];
|
|
||||||
}
|
|
||||||
return tempMax;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Index of maximum value in a word16 vector.
|
for (i = 0; i < length; i++) {
|
||||||
WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
|
if (vector[i] > maximum)
|
||||||
{
|
maximum = vector[i];
|
||||||
WebRtc_Word16 tempMax;
|
}
|
||||||
WebRtc_Word16 tempMaxIndex = 0;
|
return maximum;
|
||||||
WebRtc_Word16 i = 0;
|
|
||||||
G_CONST WebRtc_Word16 *tmpvector = vector;
|
|
||||||
|
|
||||||
tempMax = *tmpvector++;
|
|
||||||
for (i = 1; i < length; i++)
|
|
||||||
{
|
|
||||||
if (*tmpvector++ > tempMax)
|
|
||||||
{
|
|
||||||
tempMax = vector[i];
|
|
||||||
tempMaxIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tempMaxIndex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maximum value of word32 vector.
|
// Maximum value of word32 vector.
|
||||||
#ifndef XSCALE_OPT
|
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
|
||||||
WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
|
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
|
||||||
{
|
int i = 0;
|
||||||
WebRtc_Word32 tempMax;
|
|
||||||
WebRtc_Word16 i;
|
|
||||||
G_CONST WebRtc_Word32 *tmpvector = vector;
|
|
||||||
|
|
||||||
tempMax = *tmpvector++;
|
if (vector == NULL || length <= 0) {
|
||||||
for (i = 1; i < length; i++)
|
return maximum;
|
||||||
{
|
}
|
||||||
if (*tmpvector++ > tempMax)
|
|
||||||
tempMax = vector[i];
|
for (i = 0; i < length; i++) {
|
||||||
}
|
if (vector[i] > maximum)
|
||||||
return tempMax;
|
maximum = vector[i];
|
||||||
|
}
|
||||||
|
return maximum;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
#pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
|
||||||
#pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
|
#pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Index of maximum value in a word32 vector.
|
// Minimum value of word16 vector.
|
||||||
WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
|
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
|
||||||
{
|
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
|
||||||
WebRtc_Word32 tempMax;
|
int i = 0;
|
||||||
WebRtc_Word16 tempMaxIndex = 0;
|
|
||||||
WebRtc_Word16 i = 0;
|
|
||||||
G_CONST WebRtc_Word32 *tmpvector = vector;
|
|
||||||
|
|
||||||
tempMax = *tmpvector++;
|
if (vector == NULL || length <= 0) {
|
||||||
for (i = 1; i < length; i++)
|
return minimum;
|
||||||
{
|
}
|
||||||
if (*tmpvector++ > tempMax)
|
|
||||||
{
|
for (i = 0; i < length; i++) {
|
||||||
tempMax = vector[i];
|
if (vector[i] < minimum)
|
||||||
tempMaxIndex = i;
|
minimum = vector[i];
|
||||||
}
|
}
|
||||||
}
|
return minimum;
|
||||||
return tempMaxIndex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Minimum value of word16 vector.
|
// Minimum value of word32 vector.
|
||||||
WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
|
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
|
||||||
{
|
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
|
||||||
WebRtc_Word16 tempMin;
|
int i = 0;
|
||||||
WebRtc_Word16 i;
|
|
||||||
G_CONST WebRtc_Word16 *tmpvector = vector;
|
|
||||||
|
|
||||||
// Find the minimum value
|
if (vector == NULL || length <= 0) {
|
||||||
tempMin = *tmpvector++;
|
return minimum;
|
||||||
for (i = 1; i < length; i++)
|
}
|
||||||
{
|
|
||||||
if (*tmpvector++ < tempMin)
|
for (i = 0; i < length; i++) {
|
||||||
tempMin = (vector[i]);
|
if (vector[i] < minimum)
|
||||||
|
minimum = vector[i];
|
||||||
|
}
|
||||||
|
return minimum;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// Index of maximum absolute value in a word16 vector.
|
||||||
|
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
|
||||||
|
// Use type int for local variables, to accomodate the value of abs(-32768).
|
||||||
|
|
||||||
|
int i = 0, absolute = 0, maximum = 0, index = 0;
|
||||||
|
|
||||||
|
if (vector == NULL || length <= 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < length; i++) {
|
||||||
|
absolute = abs((int)vector[i]);
|
||||||
|
|
||||||
|
if (absolute > maximum) {
|
||||||
|
maximum = absolute;
|
||||||
|
index = i;
|
||||||
}
|
}
|
||||||
return tempMin;
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index of maximum value in a word16 vector.
|
||||||
|
int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) {
|
||||||
|
int i = 0, index = 0;
|
||||||
|
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
|
||||||
|
|
||||||
|
if (vector == NULL || length <= 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < length; i++) {
|
||||||
|
if (vector[i] > maximum) {
|
||||||
|
maximum = vector[i];
|
||||||
|
index = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index of maximum value in a word32 vector.
|
||||||
|
int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) {
|
||||||
|
int i = 0, index = 0;
|
||||||
|
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
|
||||||
|
|
||||||
|
if (vector == NULL || length <= 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < length; i++) {
|
||||||
|
if (vector[i] > maximum) {
|
||||||
|
maximum = vector[i];
|
||||||
|
index = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index of minimum value in a word16 vector.
|
// Index of minimum value in a word16 vector.
|
||||||
#ifndef XSCALE_OPT
|
#ifndef XSCALE_OPT
|
||||||
WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
|
int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) {
|
||||||
{
|
int i = 0, index = 0;
|
||||||
WebRtc_Word16 tempMin;
|
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
|
||||||
WebRtc_Word16 tempMinIndex = 0;
|
|
||||||
WebRtc_Word16 i = 0;
|
|
||||||
G_CONST WebRtc_Word16* tmpvector = vector;
|
|
||||||
|
|
||||||
// Find index of smallest value
|
if (vector == NULL || length <= 0) {
|
||||||
tempMin = *tmpvector++;
|
return -1;
|
||||||
for (i = 1; i < length; i++)
|
}
|
||||||
{
|
|
||||||
if (*tmpvector++ < tempMin)
|
for (i = 0; i < length; i++) {
|
||||||
{
|
if (vector[i] < minimum) {
|
||||||
tempMin = vector[i];
|
minimum = vector[i];
|
||||||
tempMinIndex = i;
|
index = i;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return tempMinIndex;
|
}
|
||||||
}
|
|
||||||
#else
|
|
||||||
#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Minimum value of word32 vector.
|
return index;
|
||||||
WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
|
|
||||||
{
|
|
||||||
WebRtc_Word32 tempMin;
|
|
||||||
WebRtc_Word16 i;
|
|
||||||
G_CONST WebRtc_Word32 *tmpvector = vector;
|
|
||||||
|
|
||||||
// Find the minimum value
|
|
||||||
tempMin = *tmpvector++;
|
|
||||||
for (i = 1; i < length; i++)
|
|
||||||
{
|
|
||||||
if (*tmpvector++ < tempMin)
|
|
||||||
tempMin = (vector[i]);
|
|
||||||
}
|
|
||||||
return tempMin;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index of minimum value in a word32 vector.
|
// Index of minimum value in a word32 vector.
|
||||||
#ifndef XSCALE_OPT
|
int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) {
|
||||||
WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
|
int i = 0, index = 0;
|
||||||
{
|
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
|
||||||
WebRtc_Word32 tempMin;
|
|
||||||
WebRtc_Word16 tempMinIndex = 0;
|
|
||||||
WebRtc_Word16 i = 0;
|
|
||||||
G_CONST WebRtc_Word32 *tmpvector = vector;
|
|
||||||
|
|
||||||
// Find index of smallest value
|
if (vector == NULL || length <= 0) {
|
||||||
tempMin = *tmpvector++;
|
return -1;
|
||||||
for (i = 1; i < length; i++)
|
}
|
||||||
{
|
|
||||||
if (*tmpvector++ < tempMin)
|
for (i = 0; i < length; i++) {
|
||||||
{
|
if (vector[i] < minimum) {
|
||||||
tempMin = vector[i];
|
minimum = vector[i];
|
||||||
tempMinIndex = i;
|
index = i;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return tempMinIndex;
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
|
||||||
#pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
|
#pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
|
||||||
#endif
|
#endif
|
||||||
|
@ -18,50 +18,288 @@
|
|||||||
.arch armv7-a
|
.arch armv7-a
|
||||||
.fpu neon
|
.fpu neon
|
||||||
.global WebRtcSpl_MaxAbsValueW16
|
.global WebRtcSpl_MaxAbsValueW16
|
||||||
|
.global WebRtcSpl_MaxAbsValueW32
|
||||||
|
.global WebRtcSpl_MaxValueW16
|
||||||
|
.global WebRtcSpl_MaxValueW32
|
||||||
|
.global WebRtcSpl_MinValueW16
|
||||||
|
.global WebRtcSpl_MinValueW32
|
||||||
.align 2
|
.align 2
|
||||||
|
|
||||||
|
@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
|
||||||
WebRtcSpl_MaxAbsValueW16:
|
WebRtcSpl_MaxAbsValueW16:
|
||||||
.fnstart
|
.fnstart
|
||||||
|
|
||||||
|
mov r2, #-1 @ Initialize the return value.
|
||||||
|
cmp r0, #0
|
||||||
|
beq END_MAX_ABS_VALUE_W16
|
||||||
|
cmp r1, #0
|
||||||
|
ble END_MAX_ABS_VALUE_W16
|
||||||
|
|
||||||
|
cmp r1, #8
|
||||||
|
blt LOOP_MAX_ABS_VALUE_W16
|
||||||
|
|
||||||
vmov.i16 q12, #0
|
vmov.i16 q12, #0
|
||||||
mov r2, #-1 @ Return value for the maximum.
|
sub r1, #8 @ Counter for loops
|
||||||
cmp r1, #0 @ length
|
|
||||||
ble END @ Return -1 if length <= 0.
|
|
||||||
cmp r1, #7
|
|
||||||
ble LOOP_NO_UNROLLING
|
|
||||||
|
|
||||||
lsr r3, r1, #3
|
LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
|
||||||
lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
|
vld1.16 {q13}, [r0]!
|
||||||
sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8.
|
subs r1, #8
|
||||||
|
|
||||||
LOOP_UNROLLED_BY_8:
|
|
||||||
vld1.16 {d26, d27}, [r0]!
|
|
||||||
subs r3, #8
|
|
||||||
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
|
vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
|
||||||
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
|
vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
|
||||||
bne LOOP_UNROLLED_BY_8
|
bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
|
||||||
|
|
||||||
@ Find the maximum value in the Neon registers and move it to r2.
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
vmax.u16 d24, d25
|
vmax.u16 d24, d25
|
||||||
vpmax.u16 d24, d24
|
vpmax.u16 d24, d24
|
||||||
vpmax.u16 d24, d24
|
vpmax.u16 d24, d24
|
||||||
cmp r1, #0
|
adds r1, #8
|
||||||
vmov.u16 r2, d24[0]
|
vmov.u16 r2, d24[0]
|
||||||
ble END
|
beq END_MAX_ABS_VALUE_W16
|
||||||
|
|
||||||
LOOP_NO_UNROLLING:
|
LOOP_MAX_ABS_VALUE_W16:
|
||||||
ldrsh r3, [r0], #2
|
ldrsh r3, [r0], #2
|
||||||
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
|
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
|
||||||
sub r12, r12, r3, asr #31
|
sub r12, r12, r3, asr #31
|
||||||
cmp r2, r12
|
cmp r2, r12
|
||||||
movlt r2, r12
|
movlt r2, r12
|
||||||
subs r1, #1
|
subs r1, #1
|
||||||
bne LOOP_NO_UNROLLING
|
bne LOOP_MAX_ABS_VALUE_W16
|
||||||
|
|
||||||
END:
|
END_MAX_ABS_VALUE_W16:
|
||||||
cmp r2, #0x8000 @ Guard against the case for -32768.
|
cmp r2, #0x8000 @ Guard against the case for -32768.
|
||||||
subeq r2, #1
|
subeq r2, #1
|
||||||
mov r0, r2
|
mov r0, r2
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
.fnend
|
.fnend
|
||||||
|
|
||||||
|
@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
|
||||||
|
WebRtcSpl_MaxAbsValueW32:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
cmp r0, #0
|
||||||
|
moveq r0, #-1
|
||||||
|
beq EXIT @ Return -1 for a NULL pointer.
|
||||||
|
cmp r1, #0 @ length
|
||||||
|
movle r0, #-1
|
||||||
|
ble EXIT @ Return -1 if length <= 0.
|
||||||
|
|
||||||
|
vmov.i32 q11, #0
|
||||||
|
vmov.i32 q12, #0
|
||||||
|
cmp r1, #8
|
||||||
|
blt LOOP_MAX_ABS_VALUE_W32
|
||||||
|
|
||||||
|
sub r1, #8 @ Counter for loops
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
|
||||||
|
vld1.32 {q13, q14}, [r0]!
|
||||||
|
subs r1, #8 @ Counter for loops
|
||||||
|
vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000.
|
||||||
|
vabs.s32 q14, q14
|
||||||
|
vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000.
|
||||||
|
vmax.u32 q12, q14
|
||||||
|
bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
|
||||||
|
|
||||||
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
|
vmax.u32 q12, q11
|
||||||
|
vmax.u32 d24, d25
|
||||||
|
vpmax.u32 d24, d24
|
||||||
|
adds r1, #8
|
||||||
|
vmov.u32 r2, d24[0]
|
||||||
|
beq END_MAX_ABS_VALUE_W32
|
||||||
|
|
||||||
|
LOOP_MAX_ABS_VALUE_W32:
|
||||||
|
ldr r3, [r0], #4
|
||||||
|
eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
|
||||||
|
sub r12, r12, r3, asr #31
|
||||||
|
cmp r2, r12
|
||||||
|
movcc r2, r12
|
||||||
|
subs r1, #1
|
||||||
|
bne LOOP_MAX_ABS_VALUE_W32
|
||||||
|
|
||||||
|
END_MAX_ABS_VALUE_W32:
|
||||||
|
mvn r0, #0x80000000 @ Guard against the case for 0x80000000.
|
||||||
|
cmp r2, r0
|
||||||
|
movcc r0, r2
|
||||||
|
|
||||||
|
EXIT:
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
|
||||||
|
WebRtcSpl_MaxValueW16:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
mov r2, #0x8000 @ Initialize the return value.
|
||||||
|
cmp r0, #0
|
||||||
|
beq END_MAX_VALUE_W16
|
||||||
|
cmp r1, #0
|
||||||
|
ble END_MAX_VALUE_W16
|
||||||
|
|
||||||
|
vmov.i16 q12, #0x8000
|
||||||
|
cmp r1, #8
|
||||||
|
blt LOOP_MAX_VALUE_W16
|
||||||
|
|
||||||
|
sub r1, #8 @ Counter for loops
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
|
||||||
|
vld1.16 {q13}, [r0]!
|
||||||
|
subs r1, #8
|
||||||
|
vmax.s16 q12, q13
|
||||||
|
bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
|
||||||
|
|
||||||
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
|
vmax.s16 d24, d25
|
||||||
|
vpmax.s16 d24, d24
|
||||||
|
vpmax.s16 d24, d24
|
||||||
|
adds r1, #8
|
||||||
|
vmov.u16 r2, d24[0]
|
||||||
|
beq END_MAX_VALUE_W16
|
||||||
|
|
||||||
|
LOOP_MAX_VALUE_W16:
|
||||||
|
ldrsh r3, [r0], #2
|
||||||
|
cmp r2, r3
|
||||||
|
movlt r2, r3
|
||||||
|
subs r1, #1
|
||||||
|
bne LOOP_MAX_VALUE_W16
|
||||||
|
|
||||||
|
END_MAX_VALUE_W16:
|
||||||
|
mov r0, r2
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
|
||||||
|
WebRtcSpl_MaxValueW32:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
mov r2, #0x80000000 @ Initialize the return value.
|
||||||
|
cmp r0, #0
|
||||||
|
beq END_MAX_VALUE_W32
|
||||||
|
cmp r1, #0
|
||||||
|
ble END_MAX_VALUE_W32
|
||||||
|
|
||||||
|
vmov.i32 q11, #0x80000000
|
||||||
|
vmov.i32 q12, #0x80000000
|
||||||
|
cmp r1, #8
|
||||||
|
blt LOOP_MAX_VALUE_W32
|
||||||
|
|
||||||
|
sub r1, #8 @ Counter for loops
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
|
||||||
|
vld1.32 {q13, q14}, [r0]!
|
||||||
|
subs r1, #8
|
||||||
|
vmax.s32 q11, q13
|
||||||
|
vmax.s32 q12, q14
|
||||||
|
bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
|
||||||
|
|
||||||
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
|
vmax.s32 q12, q11
|
||||||
|
vpmax.s32 d24, d25
|
||||||
|
vpmax.s32 d24, d24
|
||||||
|
adds r1, #8
|
||||||
|
vmov.s32 r2, d24[0]
|
||||||
|
beq END_MAX_VALUE_W32
|
||||||
|
|
||||||
|
LOOP_MAX_VALUE_W32:
|
||||||
|
ldr r3, [r0], #4
|
||||||
|
cmp r2, r3
|
||||||
|
movlt r2, r3
|
||||||
|
subs r1, #1
|
||||||
|
bne LOOP_MAX_VALUE_W32
|
||||||
|
|
||||||
|
END_MAX_VALUE_W32:
|
||||||
|
mov r0, r2
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
|
||||||
|
WebRtcSpl_MinValueW16:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
movw r2, #0x7FFF @ Initialize the return value.
|
||||||
|
cmp r0, #0
|
||||||
|
beq END_MIN_VALUE_W16
|
||||||
|
cmp r1, #0
|
||||||
|
ble END_MIN_VALUE_W16
|
||||||
|
|
||||||
|
vmov.i16 q12, #0x7FFF
|
||||||
|
cmp r1, #8
|
||||||
|
blt LOOP_MIN_VALUE_W16
|
||||||
|
|
||||||
|
sub r1, #8 @ Counter for loops
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
|
||||||
|
vld1.16 {q13}, [r0]!
|
||||||
|
subs r1, #8
|
||||||
|
vmin.s16 q12, q13
|
||||||
|
bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
|
||||||
|
|
||||||
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
|
vmin.s16 d24, d25
|
||||||
|
vpmin.s16 d24, d24
|
||||||
|
vpmin.s16 d24, d24
|
||||||
|
adds r1, #8
|
||||||
|
vmov.s16 r2, d24[0]
|
||||||
|
sxth r2, r2
|
||||||
|
beq END_MIN_VALUE_W16
|
||||||
|
|
||||||
|
LOOP_MIN_VALUE_W16:
|
||||||
|
ldrsh r3, [r0], #2
|
||||||
|
cmp r2, r3
|
||||||
|
movge r2, r3
|
||||||
|
subs r1, #1
|
||||||
|
bne LOOP_MIN_VALUE_W16
|
||||||
|
|
||||||
|
END_MIN_VALUE_W16:
|
||||||
|
mov r0, r2
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
|
||||||
|
WebRtcSpl_MinValueW32:
|
||||||
|
.fnstart
|
||||||
|
|
||||||
|
mov r2, #0x7FFFFFFF @ Initialize the return value.
|
||||||
|
cmp r0, #0
|
||||||
|
beq END_MIN_VALUE_W32
|
||||||
|
cmp r1, #0
|
||||||
|
ble END_MIN_VALUE_W32
|
||||||
|
|
||||||
|
vdup.32 q11, r2
|
||||||
|
vdup.32 q12, r2
|
||||||
|
cmp r1, #8
|
||||||
|
blt LOOP_MIN_VALUE_W32
|
||||||
|
|
||||||
|
sub r1, #8 @ Counter for loops
|
||||||
|
|
||||||
|
LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
|
||||||
|
vld1.32 {q13, q14}, [r0]!
|
||||||
|
subs r1, #8
|
||||||
|
vmin.s32 q11, q13
|
||||||
|
vmin.s32 q12, q14
|
||||||
|
bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
|
||||||
|
|
||||||
|
@ Find the maximum value in the Neon registers and move it to r2.
|
||||||
|
vmin.s32 q12, q11
|
||||||
|
vpmin.s32 d24, d25
|
||||||
|
vpmin.s32 d24, d24
|
||||||
|
adds r1, #8
|
||||||
|
vmov.s32 r2, d24[0]
|
||||||
|
beq END_MIN_VALUE_W32
|
||||||
|
|
||||||
|
LOOP_MIN_VALUE_W32:
|
||||||
|
ldr r3, [r0], #4
|
||||||
|
cmp r2, r3
|
||||||
|
movge r2, r3
|
||||||
|
subs r1, #1
|
||||||
|
bne LOOP_MIN_VALUE_W32
|
||||||
|
|
||||||
|
END_MIN_VALUE_W32:
|
||||||
|
mov r0, r2
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
Loading…
Reference in New Issue
Block a user