MIPS optimizations for the functions WebRtcSpl_SqrtFloor, WebRtcSpl_CrossCorrelation, WebRtcSpl_ScaleAndAddVectorsWithRound and the inline functions from signal_processing spl_inl.h file.
R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/1791004 Patch from Ljubomir Papuga <lpapuga@mips.com>. git-svn-id: http://webrtc.googlecode.com/svn/trunk@4779 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
5f1051631a
commit
8bf755d5c5
@ -116,17 +116,28 @@
|
||||
}],
|
||||
['target_arch=="mipsel"', {
|
||||
'sources': [
|
||||
'signal_processing/include/spl_inl_mips.h',
|
||||
'signal_processing/complex_bit_reverse_mips.c',
|
||||
'signal_processing/complex_fft_mips.c',
|
||||
'signal_processing/cross_correlation_mips.c',
|
||||
'signal_processing/downsample_fast_mips.c',
|
||||
'signal_processing/filter_ar_fast_q12_mips.c',
|
||||
'signal_processing/min_max_operations_mips.c',
|
||||
'signal_processing/resample_by_2_mips.c',
|
||||
'signal_processing/spl_sqrt_floor_mips.c',
|
||||
],
|
||||
'sources!': [
|
||||
'signal_processing/complex_bit_reverse.c',
|
||||
'signal_processing/complex_fft.c',
|
||||
'signal_processing/filter_ar_fast_q12.c',
|
||||
'signal_processing/spl_sqrt_floor.c',
|
||||
],
|
||||
'conditions': [
|
||||
['mips_dsp_rev>0', {
|
||||
'sources': [
|
||||
'signal_processing/vector_scaling_operations_mips.c',
|
||||
],
|
||||
}],
|
||||
],
|
||||
}],
|
||||
], # conditions
|
||||
|
104
webrtc/common_audio/signal_processing/cross_correlation_mips.c
Normal file
104
webrtc/common_audio/signal_processing/cross_correlation_mips.c
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
|
||||
const int16_t* seq1,
|
||||
const int16_t* seq2,
|
||||
int16_t dim_seq,
|
||||
int16_t dim_cross_correlation,
|
||||
int16_t right_shifts,
|
||||
int16_t step_seq2) {
|
||||
|
||||
int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
|
||||
int16_t *pseq2 = NULL;
|
||||
int16_t *pseq1 = NULL;
|
||||
int16_t *pseq1_0 = (int16_t*)&seq1[0];
|
||||
int16_t *pseq2_0 = (int16_t*)&seq2[0];
|
||||
int k = 0;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"sll %[step_seq2], %[step_seq2], 1 \n\t"
|
||||
"andi %[t0], %[dim_seq], 1 \n\t"
|
||||
"bgtz %[t0], 3f \n\t"
|
||||
" nop \n\t"
|
||||
"1: \n\t"
|
||||
"move %[pseq1], %[pseq1_0] \n\t"
|
||||
"move %[pseq2], %[pseq2_0] \n\t"
|
||||
"sra %[k], %[dim_seq], 1 \n\t"
|
||||
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
|
||||
"xor %[sum], %[sum], %[sum] \n\t"
|
||||
"2: \n\t"
|
||||
"lh %[t0], 0(%[pseq1]) \n\t"
|
||||
"lh %[t1], 0(%[pseq2]) \n\t"
|
||||
"lh %[t2], 2(%[pseq1]) \n\t"
|
||||
"lh %[t3], 2(%[pseq2]) \n\t"
|
||||
"mul %[t0], %[t0], %[t1] \n\t"
|
||||
"addiu %[k], %[k], -1 \n\t"
|
||||
"mul %[t2], %[t2], %[t3] \n\t"
|
||||
"addiu %[pseq1], %[pseq1], 4 \n\t"
|
||||
"addiu %[pseq2], %[pseq2], 4 \n\t"
|
||||
"srav %[t0], %[t0], %[right_shifts] \n\t"
|
||||
"addu %[sum], %[sum], %[t0] \n\t"
|
||||
"srav %[t2], %[t2], %[right_shifts] \n\t"
|
||||
"bgtz %[k], 2b \n\t"
|
||||
" addu %[sum], %[sum], %[t2] \n\t"
|
||||
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
|
||||
"sw %[sum], 0(%[cc]) \n\t"
|
||||
"bgtz %[dim_cc], 1b \n\t"
|
||||
" addiu %[cc], %[cc], 4 \n\t"
|
||||
"b 6f \n\t"
|
||||
" nop \n\t"
|
||||
"3: \n\t"
|
||||
"move %[pseq1], %[pseq1_0] \n\t"
|
||||
"move %[pseq2], %[pseq2_0] \n\t"
|
||||
"sra %[k], %[dim_seq], 1 \n\t"
|
||||
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
|
||||
"beqz %[k], 5f \n\t"
|
||||
" xor %[sum], %[sum], %[sum] \n\t"
|
||||
"4: \n\t"
|
||||
"lh %[t0], 0(%[pseq1]) \n\t"
|
||||
"lh %[t1], 0(%[pseq2]) \n\t"
|
||||
"lh %[t2], 2(%[pseq1]) \n\t"
|
||||
"lh %[t3], 2(%[pseq2]) \n\t"
|
||||
"mul %[t0], %[t0], %[t1] \n\t"
|
||||
"addiu %[k], %[k], -1 \n\t"
|
||||
"mul %[t2], %[t2], %[t3] \n\t"
|
||||
"addiu %[pseq1], %[pseq1], 4 \n\t"
|
||||
"addiu %[pseq2], %[pseq2], 4 \n\t"
|
||||
"srav %[t0], %[t0], %[right_shifts] \n\t"
|
||||
"addu %[sum], %[sum], %[t0] \n\t"
|
||||
"srav %[t2], %[t2], %[right_shifts] \n\t"
|
||||
"bgtz %[k], 4b \n\t"
|
||||
" addu %[sum], %[sum], %[t2] \n\t"
|
||||
"5: \n\t"
|
||||
"lh %[t0], 0(%[pseq1]) \n\t"
|
||||
"lh %[t1], 0(%[pseq2]) \n\t"
|
||||
"mul %[t0], %[t0], %[t1] \n\t"
|
||||
"srav %[t0], %[t0], %[right_shifts] \n\t"
|
||||
"addu %[sum], %[sum], %[t0] \n\t"
|
||||
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
|
||||
"sw %[sum], 0(%[cc]) \n\t"
|
||||
"bgtz %[dim_cc], 3b \n\t"
|
||||
" addiu %[cc], %[cc], 4 \n\t"
|
||||
"6: \n\t"
|
||||
".set pop \n\t"
|
||||
: [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
|
||||
[t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
|
||||
[pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
|
||||
[k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
|
||||
[cc] "+r" (cross_correlation)
|
||||
: [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
|
||||
: "hi", "lo", "memory"
|
||||
);
|
||||
}
|
@ -73,6 +73,8 @@
|
||||
|
||||
#ifndef WEBRTC_ARCH_ARM_V7
|
||||
// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h
|
||||
#ifndef MIPS32_LE
|
||||
// For MIPS platforms, these are inline functions in spl_inl_mips.h
|
||||
#define WEBRTC_SPL_MUL_16_16(a, b) \
|
||||
((int32_t) (((int16_t)(a)) * ((int16_t)(b))))
|
||||
#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
|
||||
@ -87,6 +89,7 @@
|
||||
(WEBRTC_SPL_MUL_16_32_RSFT16(( \
|
||||
(int16_t)((a32 & 0x0000FFFF) >> 1)), b32) >> 15)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \
|
||||
((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 5) \
|
||||
@ -456,6 +459,15 @@ int WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(const int16_t* in_vector1,
|
||||
int16_t* out_vector,
|
||||
int length);
|
||||
#endif
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
|
||||
int16_t in_vector1_scale,
|
||||
const int16_t* in_vector2,
|
||||
int16_t in_vector2_scale,
|
||||
int right_shifts,
|
||||
int16_t* out_vector,
|
||||
int length);
|
||||
#endif
|
||||
// End: Vector scaling operations.
|
||||
|
||||
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
|
||||
@ -627,6 +639,15 @@ void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
|
||||
int16_t right_shifts,
|
||||
int16_t step_seq2);
|
||||
#endif
|
||||
#if defined(MIPS32_LE)
|
||||
void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
|
||||
const int16_t* seq1,
|
||||
const int16_t* seq2,
|
||||
int16_t dim_seq,
|
||||
int16_t dim_cross_correlation,
|
||||
int16_t right_shifts,
|
||||
int16_t step_seq2);
|
||||
#endif
|
||||
|
||||
// Creates (the first half of) a Hanning window. Size must be at least 1 and
|
||||
// at most 512.
|
||||
|
@ -19,6 +19,11 @@
|
||||
#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
|
||||
#else
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
|
||||
#endif
|
||||
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
|
||||
int16_t out16 = (int16_t) value32;
|
||||
|
||||
@ -37,7 +42,9 @@ static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
|
||||
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
|
||||
return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2);
|
||||
}
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
|
||||
#if !defined(MIPS32_LE)
|
||||
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
|
||||
int bits;
|
||||
|
||||
@ -121,11 +128,13 @@ static __inline int WebRtcSpl_NormW16(int16_t a) {
|
||||
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
|
||||
return (a * b + c);
|
||||
}
|
||||
#endif // #if !defined(MIPS32_LE)
|
||||
|
||||
#endif // WEBRTC_ARCH_ARM_V7
|
||||
|
||||
// The following functions have no optimized versions.
|
||||
// TODO(kma): Consider saturating add/sub instructions in X86 platform.
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_sum;
|
||||
|
||||
@ -163,5 +172,6 @@ static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
|
||||
return l_diff;
|
||||
}
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
|
||||
#endif // WEBRTC_SPL_SPL_INL_H_
|
||||
|
281
webrtc/common_audio/signal_processing/include/spl_inl_mips.h
Normal file
281
webrtc/common_audio/signal_processing/include/spl_inl_mips.h
Normal file
@ -0,0 +1,281 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
// This header file includes the inline functions in
|
||||
// the fix point signal processing library.
|
||||
|
||||
#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_
|
||||
#define WEBRTC_SPL_SPL_INL_MIPS_H_
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a,
|
||||
int32_t b) {
|
||||
int32_t value32 = 0;
|
||||
int32_t a1 = 0, b1 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a1], %[a] \n\t"
|
||||
"seh %[b1], %[b] \n\t"
|
||||
#else
|
||||
"sll %[a1], %[a], 16 \n\t"
|
||||
"sll %[b1], %[b], 16 \n\t"
|
||||
"sra %[a1], %[a1], 16 \n\t"
|
||||
"sra %[b1], %[b1], 16 \n\t"
|
||||
#endif
|
||||
"mul %[value32], %[a1], %[b1] \n\t"
|
||||
: [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a,
|
||||
int32_t b) {
|
||||
int32_t value32 = 0, b1 = 0, b2 = 0;
|
||||
int32_t a1 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a1], %[a] \n\t"
|
||||
#else
|
||||
"sll %[a1], %[a], 16 \n\t"
|
||||
"sra %[a1], %[a1], 16 \n\t"
|
||||
#endif
|
||||
"andi %[b2], %[b], 0xFFFF \n\t"
|
||||
"sra %[b1], %[b], 16 \n\t"
|
||||
"sra %[b2], %[b2], 1 \n\t"
|
||||
"mul %[value32], %[a1], %[b1] \n\t"
|
||||
"mul %[b2], %[a1], %[b2] \n\t"
|
||||
"addiu %[b2], %[b2], 0x4000 \n\t"
|
||||
"sra %[b2], %[b2], 15 \n\t"
|
||||
"addu %[value32], %[value32], %[b2] \n\t"
|
||||
: [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2),
|
||||
[a1] "=&r" (a1)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_32_32_RSFT32BI(int32_t a,
|
||||
int32_t b) {
|
||||
int32_t tmp = 0;
|
||||
|
||||
if ((32767 < a) || (a < 0))
|
||||
tmp = WEBRTC_SPL_MUL_16_32_RSFT16(((int16_t)(a >> 16)), b);
|
||||
tmp += WEBRTC_SPL_MUL_16_32_RSFT16(((int16_t)((a & 0x0000FFFF) >> 1)),
|
||||
b) >> 15;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_32_32_RSFT32(int16_t a,
|
||||
int16_t b,
|
||||
int32_t c) {
|
||||
int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
"sra %[tmp1], %[c], 16 \n\t"
|
||||
"andi %[tmp2], %[c], 0xFFFF \n\t"
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a], %[a] \n\t"
|
||||
"seh %[b], %[b] \n\t"
|
||||
#else
|
||||
"sll %[a], %[a], 16 \n\t"
|
||||
"sra %[a], %[a], 16 \n\t"
|
||||
"sll %[b], %[b], 16 \n\t"
|
||||
"sra %[b], %[b], 16 \n\t"
|
||||
#endif
|
||||
"sra %[tmp2], %[tmp2], 1 \n\t"
|
||||
"mul %[tmp3], %[a], %[tmp2] \n\t"
|
||||
"mul %[tmp4], %[b], %[tmp2] \n\t"
|
||||
"mul %[tmp2], %[a], %[tmp1] \n\t"
|
||||
"mul %[tmp1], %[b], %[tmp1] \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"shra_r.w %[tmp3], %[tmp3], 15 \n\t"
|
||||
"shra_r.w %[tmp4], %[tmp4], 15 \n\t"
|
||||
#else
|
||||
"addiu %[tmp3], %[tmp3], 0x4000 \n\t"
|
||||
"sra %[tmp3], %[tmp3], 15 \n\t"
|
||||
"addiu %[tmp4], %[tmp4], 0x4000 \n\t"
|
||||
"sra %[tmp4], %[tmp4], 15 \n\t"
|
||||
#endif
|
||||
"addu %[tmp3], %[tmp3], %[tmp2] \n\t"
|
||||
"addu %[tmp4], %[tmp4], %[tmp1] \n\t"
|
||||
"sra %[tmp4], %[tmp4], 16 \n\t"
|
||||
"addu %[tmp1], %[tmp3], %[tmp4] \n\t"
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
|
||||
[tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
|
||||
[a] "+r" (a), [b] "+r" (b)
|
||||
: [c] "r" (c)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
|
||||
__asm __volatile(
|
||||
"shll_s.w %[value32], %[value32], 16 \n\t"
|
||||
"sra %[value32], %[value32], 16 \n\t"
|
||||
: [value32] "+r" (value32)
|
||||
:
|
||||
);
|
||||
int16_t out16 = (int16_t)value32;
|
||||
return out16;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
|
||||
int32_t value32 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
"addq_s.ph %[value32], %[a], %[b] \n\t"
|
||||
: [value32] "=r" (value32)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
);
|
||||
return (int16_t)value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_sum;
|
||||
|
||||
__asm __volatile(
|
||||
"addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t"
|
||||
: [l_sum] "=r" (l_sum)
|
||||
: [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
|
||||
);
|
||||
|
||||
return l_sum;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
|
||||
int32_t value32;
|
||||
|
||||
__asm __volatile(
|
||||
"subq_s.ph %[value32], %[var1], %[var2] \n\t"
|
||||
: [value32] "=r" (value32)
|
||||
: [var1] "r" (var1), [var2] "r" (var2)
|
||||
);
|
||||
|
||||
return (int16_t)value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_diff;
|
||||
|
||||
__asm __volatile(
|
||||
"subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t"
|
||||
: [l_diff] "=r" (l_diff)
|
||||
: [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
|
||||
);
|
||||
|
||||
return l_diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
|
||||
int bits = 0;
|
||||
int i32 = 32;
|
||||
|
||||
__asm __volatile(
|
||||
"clz %[bits], %[n] \n\t"
|
||||
"subu %[bits], %[i32], %[bits] \n\t"
|
||||
: [bits] "=&r" (bits)
|
||||
: [n] "r" (n), [i32] "r" (i32)
|
||||
);
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
static __inline int WebRtcSpl_NormW32(int32_t a) {
|
||||
int zeros = 0;
|
||||
|
||||
__asm __volatile(
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"bnez %[a], 1f \n\t"
|
||||
" sra %[zeros], %[a], 31 \n\t"
|
||||
"b 2f \n\t"
|
||||
" move %[zeros], $zero \n\t"
|
||||
"1: \n\t"
|
||||
"xor %[zeros], %[a], %[zeros] \n\t"
|
||||
"clz %[zeros], %[zeros] \n\t"
|
||||
"addiu %[zeros], %[zeros], -1 \n\t"
|
||||
"2: \n\t"
|
||||
".set pop \n\t"
|
||||
: [zeros]"=&r"(zeros)
|
||||
: [a] "r" (a)
|
||||
);
|
||||
|
||||
return zeros;
|
||||
}
|
||||
|
||||
static __inline int WebRtcSpl_NormU32(uint32_t a) {
|
||||
int zeros = 0;
|
||||
|
||||
__asm __volatile(
|
||||
"clz %[zeros], %[a] \n\t"
|
||||
: [zeros] "=r" (zeros)
|
||||
: [a] "r" (a)
|
||||
);
|
||||
|
||||
return (zeros & 0x1f);
|
||||
}
|
||||
|
||||
static __inline int WebRtcSpl_NormW16(int16_t a) {
|
||||
int zeros = 0;
|
||||
int a0 = a << 16;
|
||||
|
||||
__asm __volatile(
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"bnez %[a0], 1f \n\t"
|
||||
" sra %[zeros], %[a0], 31 \n\t"
|
||||
"b 2f \n\t"
|
||||
" move %[zeros], $zero \n\t"
|
||||
"1: \n\t"
|
||||
"xor %[zeros], %[a0], %[zeros] \n\t"
|
||||
"clz %[zeros], %[zeros] \n\t"
|
||||
"addiu %[zeros], %[zeros], -1 \n\t"
|
||||
"2: \n\t"
|
||||
".set pop \n\t"
|
||||
: [zeros]"=&r"(zeros)
|
||||
: [a0] "r" (a0)
|
||||
);
|
||||
|
||||
return zeros;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtc_MulAccumW16(int16_t a,
|
||||
int16_t b,
|
||||
int32_t c) {
|
||||
int32_t res = 0, c1 = 0;
|
||||
__asm __volatile(
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a], %[a] \n\t"
|
||||
"seh %[b], %[b] \n\t"
|
||||
#else
|
||||
"sll %[a], %[a], 16 \n\t"
|
||||
"sll %[b], %[b], 16 \n\t"
|
||||
"sra %[a], %[a], 16 \n\t"
|
||||
"sra %[b], %[b], 16 \n\t"
|
||||
#endif
|
||||
"mul %[res], %[a], %[b] \n\t"
|
||||
"addu %[c1], %[c], %[res] \n\t"
|
||||
: [c1] "=r" (c1), [res] "=&r" (res)
|
||||
: [a] "r" (a), [b] "r" (b), [c] "r" (c)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return (c1);
|
||||
}
|
||||
|
||||
#endif // WEBRTC_SPL_SPL_INL_MIPS_H_
|
@ -529,12 +529,14 @@ TEST_F(SplTest, CrossCorrelationTest) {
|
||||
// are not bit-exact.
|
||||
const int32_t kExpected[kCrossCorrelationDimension] =
|
||||
{-266947903, -15579555, -171282001};
|
||||
const int32_t* expected = kExpected;
|
||||
#if !defined(MIPS32_LE)
|
||||
const int32_t kExpectedNeon[kCrossCorrelationDimension] =
|
||||
{-266947901, -15579553, -171281999};
|
||||
const int32_t* expected = kExpected;
|
||||
if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) {
|
||||
expected = kExpectedNeon;
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < kCrossCorrelationDimension; ++i) {
|
||||
EXPECT_EQ(expected[i], vector32[i]);
|
||||
}
|
||||
|
@ -82,18 +82,20 @@ static void InitPointersToMIPS() {
|
||||
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
|
||||
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
|
||||
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
|
||||
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
|
||||
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
|
||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||
WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
|
||||
WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
|
||||
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
|
||||
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
|
||||
#else
|
||||
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
207
webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
Normal file
207
webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
Normal file
@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
|
||||
* license.
|
||||
*
|
||||
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
|
||||
* Date: Fri, Jun 24, 2011 at 3:20 AM
|
||||
* Subject: Re: sqrt routine
|
||||
* To: Kevin Ma <kma@google.com>
|
||||
* Hi Kevin,
|
||||
* Thanks for asking. Those routines are public domain (originally posted to
|
||||
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
|
||||
* Cheers,
|
||||
* Wilco
|
||||
*
|
||||
* ----- Original Message -----
|
||||
* From: "Kevin Ma" <kma@google.com>
|
||||
* To: <Wilco.Dijkstra@ntlworld.com>
|
||||
* Sent: Thursday, June 23, 2011 11:44 PM
|
||||
* Subject: Fwd: sqrt routine
|
||||
* Hi Wilco,
|
||||
* I saw your sqrt routine from several web sites, including
|
||||
* http://www.finesse.demon.co.uk/steven/sqrt.html.
|
||||
* Just wonder if there's any copyright information with your Successive
|
||||
* approximation routines, or if I can freely use it for any purpose.
|
||||
* Thanks.
|
||||
* Kevin
|
||||
*/
|
||||
|
||||
// Minor modifications in code style for WebRTC, 2012.
|
||||
// Code optimizations for MIPS, 2013.
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
/*
|
||||
* Algorithm:
|
||||
* Successive approximation of the equation (root + delta) ^ 2 = N
|
||||
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
|
||||
* Use delta = 2^i for i = 15 .. 0.
|
||||
*
|
||||
* Output precision is 16 bits. Note for large input values (close to
|
||||
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
|
||||
* contains the MSB information (a non-sign value). Do with caution
|
||||
* if you need to cast the output to int16_t type.
|
||||
*
|
||||
* If the input value is negative, it returns 0.
|
||||
*/
|
||||
|
||||
|
||||
int32_t WebRtcSpl_SqrtFloor(int32_t value)
|
||||
{
|
||||
int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
|
||||
|
||||
__asm __volatile(
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"lui %[tmp1], 0x4000 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"sub %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"lui %[tmp1], 0x1 \n\t"
|
||||
"or %[tmp4], %[root], %[tmp1] \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x4000 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 14 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x8000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x2000 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 13 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x4000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x1000 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 12 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x2000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x800 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 11 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x1000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x400 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 10 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x800 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x200 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 9 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x400 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x100 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 8 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x200 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x80 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 7 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x100 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x40 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 6 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x80 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x20 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 5 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x40 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x10 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 4 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x20 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x8 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 3 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x10 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x4 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 2 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x8 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x2 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 1 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x4 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x1 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x2 \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [root] "+r" (root), [value] "+r" (value),
|
||||
[tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
|
||||
[tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
|
||||
:
|
||||
);
|
||||
|
||||
return root >> 1;
|
||||
}
|
||||
|
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains implementations of the functions
|
||||
* WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
|
||||
int16_t in_vector1_scale,
|
||||
const int16_t* in_vector2,
|
||||
int16_t in_vector2_scale,
|
||||
int right_shifts,
|
||||
int16_t* out_vector,
|
||||
int length) {
|
||||
int16_t r0 = 0, r1 = 0;
|
||||
int16_t *in1 = (int16_t*)in_vector1;
|
||||
int16_t *in2 = (int16_t*)in_vector2;
|
||||
int16_t *out = out_vector;
|
||||
int i = 0, value32 = 0;
|
||||
|
||||
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
|
||||
length <= 0 || right_shifts < 0) {
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < length; i++) {
|
||||
__asm __volatile (
|
||||
"lh %[r0], 0(%[in1]) \n\t"
|
||||
"lh %[r1], 0(%[in2]) \n\t"
|
||||
"mult %[r0], %[in_vector1_scale] \n\t"
|
||||
"madd %[r1], %[in_vector2_scale] \n\t"
|
||||
"extrv_r.w %[value32], $ac0, %[right_shifts] \n\t"
|
||||
"addiu %[in1], %[in1], 2 \n\t"
|
||||
"addiu %[in2], %[in2], 2 \n\t"
|
||||
"sh %[value32], 0(%[out]) \n\t"
|
||||
"addiu %[out], %[out], 2 \n\t"
|
||||
: [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
|
||||
[in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
|
||||
: [in_vector1_scale] "r" (in_vector1_scale),
|
||||
[in_vector2_scale] "r" (in_vector2_scale),
|
||||
[right_shifts] "r" (right_shifts)
|
||||
: "hi", "lo", "memory"
|
||||
);
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user