MIPS optimizations for Signal Processing Library patch01

Review URL: https://webrtc-codereview.appspot.com/1028004
Patch from Ljubomir Papuga <lpapuga@mips.com>.

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3557 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2013-02-21 20:12:21 +00:00
parent 60f83131e4
commit 5140e24037
8 changed files with 790 additions and 4 deletions

View File

@ -5,8 +5,8 @@ Google Inc.
Mozilla Foundation
Intel Corporation
Vonage Holdings Corp.
MIPS Technologies
Ben Strong <bstrong@gmail.com>
Petar Jovanovic <petarj@mips.com>
Martin Storsjo <martin@martin.st>
Jie Mao <maojie0924@gmail.com>
Anil Kumar <an1kumar@gmail.com>

View File

@ -73,6 +73,12 @@
'libyuv_dir%': '<(DEPTH)/third_party/libyuv',
# Define MIPS architecture variant, MIPS DSP variant and MIPS FPU
# This may be subject to change in accordance to Chromium's MIPS flags
'mips_arch_variant%': 'mips32r1',
'mips_dsp_rev%': 0,
'mips_fpu%' : 1,
'conditions': [
['build_with_chromium==1', {
# Exclude pulse audio on Chromium since its prerequisites don't require
@ -199,6 +205,59 @@
}],
],
}],
['target_arch=="mipsel"', {
'defines': [
'MIPS32_LE',
],
'conditions': [
['mips_fpu==1', {
'defines': [
'MIPS_FPU_LE',
],
'cflags': [
'-mhard-float',
],
}, {
'cflags': [
'-msoft-float',
],
}],
['mips_arch_variant=="mips32r2"', {
'defines': [
'MIPS32_R2_LE',
],
'cflags': [
'-mips32r2',
],
'cflags_cc': [
'-mips32r2',
],
}],
['mips_dsp_rev==1', {
'defines': [
'MIPS_DSP_R1_LE',
],
'cflags': [
'-mdsp',
],
'cflags_cc': [
'-mdsp',
],
}],
['mips_dsp_rev==2', {
'defines': [
'MIPS_DSP_R1_LE',
'MIPS_DSP_R2_LE',
],
'cflags': [
'-mdspr2',
],
'cflags_cc': [
'-mdspr2',
],
}],
],
}],
['OS=="ios"', {
'defines': [
'WEBRTC_MAC',

View File

@ -147,8 +147,7 @@
((WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT((a), 18816, 7) & 0x00007fff))
#ifdef __cplusplus
extern "C"
{
extern "C" {
#endif
#define WEBRTC_SPL_MEMCPY_W8(v1, v2, length) \
@ -223,6 +222,9 @@ int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
#endif
#if defined(MIPS32_LE)
int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, int length);
#endif
// Returns the largest absolute value in a signed 32-bit vector.
//
@ -238,6 +240,9 @@ int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
#endif
#if defined(MIPS_DSP_R1_LE)
int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, int length);
#endif
// Returns the maximum value of a 16-bit vector.
//
@ -255,6 +260,9 @@ int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
#endif
#if defined(MIPS32_LE)
int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, int length);
#endif
// Returns the maximum value of a 32-bit vector.
//
@ -272,6 +280,9 @@ int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
#endif
#if defined(MIPS32_LE)
int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, int length);
#endif
// Returns the minimum value of a 16-bit vector.
//
@ -289,6 +300,9 @@ int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
#endif
#if defined(MIPS32_LE)
int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, int length);
#endif
// Returns the minimum value of a 32-bit vector.
//
@ -306,6 +320,9 @@ int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
#endif
#if defined(MIPS32_LE)
int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, int length);
#endif
// Returns the vector index to the largest absolute value of a 16-bit vector.
//

View File

@ -0,0 +1,386 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of function
* WebRtcSpl_MaxAbsValueW16()
*
* The description header can be found in signal_processing_library.h.
*
*/
#include "signal_processing_library.h"
// Maximum absolute value of word16 vector.
int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, int length) {
WebRtc_Word32 totMax = 0;
WebRtc_Word32 tmp32_0, tmp32_1, tmp32_2, tmp32_3;
int i, loop_size;
if (vector == NULL || length <= 0) {
return -1;
}
#if defined(MIPS_DSP_R1)
const WebRtc_Word32* tmpvec32 = (WebRtc_Word32*)vector;
loop_size = length >> 4;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lw %[tmp32_0], 0(%[tmpvec32]) \n\t"
"lw %[tmp32_1], 4(%[tmpvec32]) \n\t"
"lw %[tmp32_2], 8(%[tmpvec32]) \n\t"
"lw %[tmp32_3], 12(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"lw %[tmp32_0], 16(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"lw %[tmp32_1], 20(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"lw %[tmp32_2], 24(%[tmpvec32]) \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"lw %[tmp32_3], 28(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"addiu %[tmpvec32], %[tmpvec32], 32 \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
[totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
:
: "memory"
);
}
__asm__ volatile (
"rotr %[tmp32_0], %[totMax], 16 \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"packrl.ph %[totMax], $0, %[totMax] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
:
);
loop_size = length & 0xf;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvec32]) \n\t"
"addiu %[tmpvec32], %[tmpvec32], 2 \n\t"
"absq_s.w %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
:
: "memory"
);
}
#else // #if defined(MIPS_DSP_R1)
WebRtc_Word32 v16MaxMax = WEBRTC_SPL_WORD16_MAX;
WebRtc_Word32 r, r1, r2, r3;
const WebRtc_Word16* tmpvector = vector;
loop_size = length >> 4;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"lh %[tmp32_1], 2(%[tmpvector]) \n\t"
"lh %[tmp32_2], 4(%[tmpvector]) \n\t"
"lh %[tmp32_3], 6(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 8(%[tmpvector]) \n\t"
"lh %[tmp32_1], 10(%[tmpvector]) \n\t"
"lh %[tmp32_2], 12(%[tmpvector]) \n\t"
"lh %[tmp32_3], 14(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 16(%[tmpvector]) \n\t"
"lh %[tmp32_1], 18(%[tmpvector]) \n\t"
"lh %[tmp32_2], 20(%[tmpvector]) \n\t"
"lh %[tmp32_3], 22(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 24(%[tmpvector]) \n\t"
"lh %[tmp32_1], 26(%[tmpvector]) \n\t"
"lh %[tmp32_2], 28(%[tmpvector]) \n\t"
"lh %[tmp32_3], 30(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"addiu %[tmpvector], %[tmpvector], 32 \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
[totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
:
: "memory"
);
}
loop_size = length & 0xf;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"addiu %[tmpvector], %[tmpvector], 2 \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
:
: "memory"
);
}
__asm__ volatile (
"slt %[r], %[v16MaxMax], %[totMax] \n\t"
"movn %[totMax], %[v16MaxMax], %[r] \n\t"
: [totMax] "+r" (totMax), [r] "=&r" (r)
: [v16MaxMax] "r" (v16MaxMax)
);
#endif // #if defined(MIPS_DSP_R1)
return (int16_t)totMax;
}
#if defined(MIPS_DSP_R1_LE)
// Maximum absolute value of word32 vector. Version for MIPS platform.
int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, int length) {
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
uint32_t absolute = 0, maximum = 0;
int tmp1 = 0, max_value = 0x7fffffff;
if (vector == NULL || length <= 0) {
return -1;
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[absolute], 0(%[vector]) \n\t"
"absq_s.w %[absolute], %[absolute] \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[absolute] \n\t"
"movn %[maximum], %[absolute], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
"slt %[tmp1], %[max_value], %[maximum] \n\t"
"movn %[maximum], %[max_value], %[tmp1] \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
: [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
: "memory"
);
return (int32_t)maximum;
}
#endif // #if defined(MIPS_DSP_R1_LE)
// Maximum value of word16 vector. Version for MIPS platform.
int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, int length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
int tmp1;
int16_t value;
if (vector == NULL || length <= 0) {
return maximum;
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return maximum;
}
// Maximum value of word32 vector. Version for MIPS platform.
int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, int length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
int tmp1, value;
if (vector == NULL || length <= 0) {
return maximum;
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return maximum;
}
// Minimum value of word16 vector. Version for MIPS platform.
int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, int length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int tmp1;
int16_t value;
if (vector == NULL || length <= 0) {
return minimum;
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return minimum;
}
// Minimum value of word32 vector. Version for MIPS platform.
int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, int length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
int tmp1, value;
if (vector == NULL || length <= 0) {
return minimum;
}
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return minimum;
}

View File

@ -66,6 +66,7 @@ static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255};
// decimator
#if !defined(MIPS32_LE)
void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len,
WebRtc_Word16* out, WebRtc_Word32* filtState) {
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
@ -121,6 +122,7 @@ void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len,
filtState[6] = state6;
filtState[7] = state7;
}
#endif // #if defined(MIPS32_LE)
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len,

View File

@ -0,0 +1,291 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling by two functions.
* The description header can be found in signal_processing_library.h
*
*/
#if defined(MIPS32_LE)
#include "signal_processing_library.h"
// allpass filter coefficients.
static const WebRtc_UWord16 kResampleAllpass1[3] = {3284, 24441, 49528};
static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255};
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
// decimator
void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in,
const WebRtc_Word16 len,
WebRtc_Word16* out,
WebRtc_Word32* filtState) {
WebRtc_Word32 out32;
WebRtc_Word16 i, len1;
register WebRtc_Word32 state0 = filtState[0];
register WebRtc_Word32 state1 = filtState[1];
register WebRtc_Word32 state2 = filtState[2];
register WebRtc_Word32 state3 = filtState[3];
register WebRtc_Word32 state4 = filtState[4];
register WebRtc_Word32 state5 = filtState[5];
register WebRtc_Word32 state6 = filtState[6];
register WebRtc_Word32 state7 = filtState[7];
#if defined(MIPS_DSP_R2_LE)
WebRtc_Word32 k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
k1Res0= 3284;
k1Res1= 24441;
k1Res2= 49528;
k2Res0= 12199;
k2Res1= 37471;
k2Res2= 60255;
len1 = (len >> 1);
const WebRtc_Word32* inw = (WebRtc_Word32*)in;
WebRtc_Word32 tmp11, tmp12, tmp21, tmp22;
WebRtc_Word32 in322, in321;
WebRtc_Word32 diff1, diff2;
for (i = len1; i > 0; i--) {
__asm__ volatile (
"lh %[in321], 0(%[inw]) \n\t"
"lh %[in322], 2(%[inw]) \n\t"
"sll %[in321], %[in321], 10 \n\t"
"sll %[in322], %[in322], 10 \n\t"
"addiu %[inw], %[inw], 4 \n\t"
"subu %[diff1], %[in321], %[state1] \n\t"
"subu %[diff2], %[in322], %[state5] \n\t"
: [in322] "=&r" (in322), [in321] "=&r" (in321),
[diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
: [state1] "r" (state1), [state5] "r" (state5)
: "memory"
);
__asm__ volatile (
"mult $ac0, %[diff1], %[k2Res0] \n\t"
"mult $ac1, %[diff2], %[k1Res0] \n\t"
"extr.w %[tmp11], $ac0, 16 \n\t"
"extr.w %[tmp12], $ac1, 16 \n\t"
"addu %[tmp11], %[state0], %[tmp11] \n\t"
"addu %[tmp12], %[state4], %[tmp12] \n\t"
"addiu %[state0], %[in321], 0 \n\t"
"addiu %[state4], %[in322], 0 \n\t"
"subu %[diff1], %[tmp11], %[state2] \n\t"
"subu %[diff2], %[tmp12], %[state6] \n\t"
"mult $ac0, %[diff1], %[k2Res1] \n\t"
"mult $ac1, %[diff2], %[k1Res1] \n\t"
"extr.w %[tmp21], $ac0, 16 \n\t"
"extr.w %[tmp22], $ac1, 16 \n\t"
"addu %[tmp21], %[state1], %[tmp21] \n\t"
"addu %[tmp22], %[state5], %[tmp22] \n\t"
"addiu %[state1], %[tmp11], 0 \n\t"
"addiu %[state5], %[tmp12], 0 \n\t"
: [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
[tmp11] "=&r" (tmp11), [state0] "+r" (state0),
[state1] "+r" (state1),
[state2] "+r" (state2),
[state4] "+r" (state4), [tmp12] "=&r" (tmp12),
[state6] "+r" (state6), [state5] "+r" (state5)
: [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
[diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
[in321] "r" (in321), [k1Res0] "r" (k1Res0)
: "hi", "lo", "$ac1hi", "$ac1lo"
);
// upper allpass filter
__asm__ volatile (
"subu %[diff1], %[tmp21], %[state3] \n\t"
"subu %[diff2], %[tmp22], %[state7] \n\t"
"mult $ac0, %[diff1], %[k2Res2] \n\t"
"mult $ac1, %[diff2], %[k1Res2] \n\t"
"extr.w %[state3], $ac0, 16 \n\t"
"extr.w %[state7], $ac1, 16 \n\t"
"addu %[state3], %[state2], %[state3] \n\t"
"addu %[state7], %[state6], %[state7] \n\t"
"addiu %[state2], %[tmp21], 0 \n\t"
"addiu %[state6], %[tmp22], 0 \n\t"
// add two allpass outputs, divide by two and round
"addu %[out32], %[state3], %[state7] \n\t"
"addiu %[out32], %[out32], 1024 \n\t"
"sra %[out32], %[out32], 11 \n\t"
: [state3] "+r" (state3), [state6] "+r" (state6),
[state2] "+r" (state2), [diff2] "=&r" (diff2),
[out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
: [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
[k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
: "hi", "lo", "$ac1hi", "$ac1lo"
);
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
#else // #if defined(MIPS_DSP_R2_LE)
WebRtc_Word32 tmp1, tmp2, diff;
WebRtc_Word32 in32;
len1 = (len >> 1)/4;
for (i = len1; i > 0; i--) {
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
#endif // #if defined(MIPS_DSP_R2_LE)
__asm__ volatile (
"sw %[state0], 0(%[filtState]) \n\t"
"sw %[state1], 4(%[filtState]) \n\t"
"sw %[state2], 8(%[filtState]) \n\t"
"sw %[state3], 12(%[filtState]) \n\t"
"sw %[state4], 16(%[filtState]) \n\t"
"sw %[state5], 20(%[filtState]) \n\t"
"sw %[state6], 24(%[filtState]) \n\t"
"sw %[state7], 28(%[filtState]) \n\t"
:
: [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
[state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
[state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
: "memory"
);
}
#endif // #if defined(MIPS32_LE)

View File

@ -84,6 +84,12 @@
}],
],
}],
['target_arch=="mipsel"', {
'sources': [
'min_max_operations_mips.c',
'resample_by_2_mips.c',
],
}],
],
# Ignore warning on shift operator promotion.
'msvs_disabled_warnings': [ 4334, ],

View File

@ -31,7 +31,8 @@ ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
RealForwardFFT WebRtcSpl_RealForwardFFT;
RealInverseFFT WebRtcSpl_RealInverseFFT;
#if defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON)
#if (defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON)) && \
!defined(MIPS32_LE)
/* Initialize function pointers to the generic C version. */
static void InitPointersToC() {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
@ -67,6 +68,28 @@ static void InitPointersToNeon() {
}
#endif
#if defined(MIPS32_LE)
/* Initialize function pointers to the MIPS version. */
static void InitPointersToMIPS() {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
#if defined(MIPS_DSP_R1_LE)
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
#else
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
#endif
}
#endif
static void InitFunctionPointers(void) {
#if defined(WEBRTC_DETECT_ARM_NEON)
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
@ -76,6 +99,8 @@ static void InitFunctionPointers(void) {
}
#elif defined(WEBRTC_ARCH_ARM_NEON)
InitPointersToNeon();
#elif defined(MIPS32_LE)
InitPointersToMIPS();
#else
InitPointersToC();
#endif /* WEBRTC_DETECT_ARM_NEON */