diff --git a/webrtc/common_audio/common_audio.gyp b/webrtc/common_audio/common_audio.gyp index acf8491e4..131868484 100644 --- a/webrtc/common_audio/common_audio.gyp +++ b/webrtc/common_audio/common_audio.gyp @@ -116,17 +116,28 @@ }], ['target_arch=="mipsel"', { 'sources': [ + 'signal_processing/include/spl_inl_mips.h', 'signal_processing/complex_bit_reverse_mips.c', 'signal_processing/complex_fft_mips.c', + 'signal_processing/cross_correlation_mips.c', 'signal_processing/downsample_fast_mips.c', 'signal_processing/filter_ar_fast_q12_mips.c', 'signal_processing/min_max_operations_mips.c', 'signal_processing/resample_by_2_mips.c', + 'signal_processing/spl_sqrt_floor_mips.c', ], 'sources!': [ 'signal_processing/complex_bit_reverse.c', 'signal_processing/complex_fft.c', 'signal_processing/filter_ar_fast_q12.c', + 'signal_processing/spl_sqrt_floor.c', + ], + 'conditions': [ + ['mips_dsp_rev>0', { + 'sources': [ + 'signal_processing/vector_scaling_operations_mips.c', + ], + }], ], }], ], # conditions diff --git a/webrtc/common_audio/signal_processing/cross_correlation_mips.c b/webrtc/common_audio/signal_processing/cross_correlation_mips.c new file mode 100644 index 000000000..7d9a6c644 --- /dev/null +++ b/webrtc/common_audio/signal_processing/cross_correlation_mips.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + int16_t dim_seq, + int16_t dim_cross_correlation, + int16_t right_shifts, + int16_t step_seq2) { + + int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0; + int16_t *pseq2 = NULL; + int16_t *pseq1 = NULL; + int16_t *pseq1_0 = (int16_t*)&seq1[0]; + int16_t *pseq2_0 = (int16_t*)&seq2[0]; + int k = 0; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[step_seq2], %[step_seq2], 1 \n\t" + "andi %[t0], %[dim_seq], 1 \n\t" + "bgtz %[t0], 3f \n\t" + " nop \n\t" + "1: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "xor %[sum], %[sum], %[sum] \n\t" + "2: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 2b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 1b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "b 6f \n\t" + " nop \n\t" + "3: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "beqz %[k], 5f \n\t" + " xor %[sum], %[sum], %[sum] \n\t" + "4: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 4b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "5: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 3b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "6: \n\t" + ".set pop \n\t" + : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1), + [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1), + [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0), + [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum), + [cc] "+r" (cross_correlation) + : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts) + : "hi", "lo", "memory" + ); +} diff --git a/webrtc/common_audio/signal_processing/include/signal_processing_library.h b/webrtc/common_audio/signal_processing/include/signal_processing_library.h index 887400cb2..c567beba5 100644 --- a/webrtc/common_audio/signal_processing/include/signal_processing_library.h +++ b/webrtc/common_audio/signal_processing/include/signal_processing_library.h @@ -73,6 +73,8 @@ #ifndef WEBRTC_ARCH_ARM_V7 // For ARMv7 platforms, these are inline functions in spl_inl_armv7.h +#ifndef MIPS32_LE +// For MIPS platforms, these are inline functions in spl_inl_mips.h #define WEBRTC_SPL_MUL_16_16(a, b) \ ((int32_t) (((int16_t)(a)) * ((int16_t)(b)))) #define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \ @@ -87,6 +89,7 @@ (WEBRTC_SPL_MUL_16_32_RSFT16(( \ (int16_t)((a32 & 0x0000FFFF) >> 1)), b32) >> 15))) #endif +#endif #define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \ ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 5) \ @@ -456,6 +459,15 @@ int WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(const int16_t* in_vector1, int16_t* out_vector, int length); #endif +#if defined(MIPS_DSP_R1_LE) +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + int length); +#endif // End: Vector scaling operations. // iLBC specific functions. Implementations in ilbc_specific_functions.c. @@ -627,6 +639,15 @@ void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, int16_t right_shifts, int16_t step_seq2); #endif +#if defined(MIPS32_LE) +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + int16_t dim_seq, + int16_t dim_cross_correlation, + int16_t right_shifts, + int16_t step_seq2); +#endif // Creates (the first half of) a Hanning window. Size must be at least 1 and // at most 512. diff --git a/webrtc/common_audio/signal_processing/include/spl_inl.h b/webrtc/common_audio/signal_processing/include/spl_inl.h index 7c7f85d83..a4ddb3fa9 100644 --- a/webrtc/common_audio/signal_processing/include/spl_inl.h +++ b/webrtc/common_audio/signal_processing/include/spl_inl.h @@ -19,6 +19,11 @@ #include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h" #else +#if defined(MIPS32_LE) +#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h" +#endif + +#if !defined(MIPS_DSP_R1_LE) static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { int16_t out16 = (int16_t) value32; @@ -37,7 +42,9 @@ static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2); } +#endif // #if !defined(MIPS_DSP_R1_LE) +#if !defined(MIPS32_LE) static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { int bits; @@ -121,11 +128,13 @@ static __inline int WebRtcSpl_NormW16(int16_t a) { static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { return (a * b + c); } +#endif // #if !defined(MIPS32_LE) #endif // WEBRTC_ARCH_ARM_V7 // The following functions have no optimized versions. // TODO(kma): Consider saturating add/sub instructions in X86 platform. +#if !defined(MIPS_DSP_R1_LE) static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { int32_t l_sum; @@ -163,5 +172,6 @@ static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { return l_diff; } +#endif // #if !defined(MIPS_DSP_R1_LE) #endif // WEBRTC_SPL_SPL_INL_H_ diff --git a/webrtc/common_audio/signal_processing/include/spl_inl_mips.h b/webrtc/common_audio/signal_processing/include/spl_inl_mips.h new file mode 100644 index 000000000..e17377778 --- /dev/null +++ b/webrtc/common_audio/signal_processing/include/spl_inl_mips.h @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_ +#define WEBRTC_SPL_SPL_INL_MIPS_H_ + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a, + int32_t b) { + int32_t value32 = 0; + int32_t a1 = 0, b1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" + "seh %[b1], %[b] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sll %[b1], %[b], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" + "sra %[b1], %[b1], 16 \n\t" +#endif + "mul %[value32], %[a1], %[b1] \n\t" + : [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1) + : [a] "r" (a), [b] "r" (b) + : "hi", "lo" + ); + return value32; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, + int32_t b) { + int32_t value32 = 0, b1 = 0, b2 = 0; + int32_t a1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" +#endif + "andi %[b2], %[b], 0xFFFF \n\t" + "sra %[b1], %[b], 16 \n\t" + "sra %[b2], %[b2], 1 \n\t" + "mul %[value32], %[a1], %[b1] \n\t" + "mul %[b2], %[a1], %[b2] \n\t" + "addiu %[b2], %[b2], 0x4000 \n\t" + "sra %[b2], %[b2], 15 \n\t" + "addu %[value32], %[value32], %[b2] \n\t" + : [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2), + [a1] "=&r" (a1) + : [a] "r" (a), [b] "r" (b) + : "hi", "lo" + ); + return value32; +} + +static __inline int32_t WEBRTC_SPL_MUL_32_32_RSFT32BI(int32_t a, + int32_t b) { + int32_t tmp = 0; + + if ((32767 < a) || (a < 0)) + tmp = WEBRTC_SPL_MUL_16_32_RSFT16(((int16_t)(a >> 16)), b); + tmp += WEBRTC_SPL_MUL_16_32_RSFT16(((int16_t)((a & 0x0000FFFF) >> 1)), + b) >> 15; + + return tmp; +} + +static __inline int32_t WEBRTC_SPL_MUL_32_32_RSFT32(int16_t a, + int16_t b, + int32_t c) { + int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0; + + __asm __volatile( + "sra %[tmp1], %[c], 16 \n\t" + "andi %[tmp2], %[c], 0xFFFF \n\t" +#if defined(MIPS32_R2_LE) + "seh %[a], %[a] \n\t" + "seh %[b], %[b] \n\t" +#else + "sll %[a], %[a], 16 \n\t" + "sra %[a], %[a], 16 \n\t" + "sll %[b], %[b], 16 \n\t" + "sra %[b], %[b], 16 \n\t" +#endif + "sra %[tmp2], %[tmp2], 1 \n\t" + "mul %[tmp3], %[a], %[tmp2] \n\t" + "mul %[tmp4], %[b], %[tmp2] \n\t" + "mul %[tmp2], %[a], %[tmp1] \n\t" + "mul %[tmp1], %[b], %[tmp1] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[tmp3], %[tmp3], 15 \n\t" + "shra_r.w %[tmp4], %[tmp4], 15 \n\t" +#else + "addiu %[tmp3], %[tmp3], 0x4000 \n\t" + "sra %[tmp3], %[tmp3], 15 \n\t" + "addiu %[tmp4], %[tmp4], 0x4000 \n\t" + "sra %[tmp4], %[tmp4], 15 \n\t" +#endif + "addu %[tmp3], %[tmp3], %[tmp2] \n\t" + "addu %[tmp4], %[tmp4], %[tmp1] \n\t" + "sra %[tmp4], %[tmp4], 16 \n\t" + "addu %[tmp1], %[tmp3], %[tmp4] \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), + [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [a] "+r" (a), [b] "+r" (b) + : [c] "r" (c) + : "hi", "lo" + ); + return tmp1; +} + +#if defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + __asm __volatile( + "shll_s.w %[value32], %[value32], 16 \n\t" + "sra %[value32], %[value32], 16 \n\t" + : [value32] "+r" (value32) + : + ); + int16_t out16 = (int16_t)value32; + return out16; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t value32 = 0; + + __asm __volatile( + "addq_s.ph %[value32], %[a], %[b] \n\t" + : [value32] "=r" (value32) + : [a] "r" (a), [b] "r" (b) + ); + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum; + + __asm __volatile( + "addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t" + : [l_sum] "=r" (l_sum) + : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2) + ); + + return l_sum; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t value32; + + __asm __volatile( + "subq_s.ph %[value32], %[var1], %[var2] \n\t" + : [value32] "=r" (value32) + : [var1] "r" (var1), [var2] "r" (var2) + ); + + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_diff; + + __asm __volatile( + "subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t" + : [l_diff] "=r" (l_diff) + : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2) + ); + + return l_diff; +} +#endif + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int bits = 0; + int i32 = 32; + + __asm __volatile( + "clz %[bits], %[n] \n\t" + "subu %[bits], %[i32], %[bits] \n\t" + : [bits] "=&r" (bits) + : [n] "r" (n), [i32] "r" (i32) + ); + + return bits; +} + +static __inline int WebRtcSpl_NormW32(int32_t a) { + int zeros = 0; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a], 1f \n\t" + " sra %[zeros], %[a], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros]"=&r"(zeros) + : [a] "r" (a) + ); + + return zeros; +} + +static __inline int WebRtcSpl_NormU32(uint32_t a) { + int zeros = 0; + + __asm __volatile( + "clz %[zeros], %[a] \n\t" + : [zeros] "=r" (zeros) + : [a] "r" (a) + ); + + return (zeros & 0x1f); +} + +static __inline int WebRtcSpl_NormW16(int16_t a) { + int zeros = 0; + int a0 = a << 16; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a0], 1f \n\t" + " sra %[zeros], %[a0], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a0], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros]"=&r"(zeros) + : [a0] "r" (a0) + ); + + return zeros; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, + int16_t b, + int32_t c) { + int32_t res = 0, c1 = 0; + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a], %[a] \n\t" + "seh %[b], %[b] \n\t" +#else + "sll %[a], %[a], 16 \n\t" + "sll %[b], %[b], 16 \n\t" + "sra %[a], %[a], 16 \n\t" + "sra %[b], %[b], 16 \n\t" +#endif + "mul %[res], %[a], %[b] \n\t" + "addu %[c1], %[c], %[res] \n\t" + : [c1] "=r" (c1), [res] "=&r" (res) + : [a] "r" (a), [b] "r" (b), [c] "r" (c) + : "hi", "lo" + ); + return (c1); +} + +#endif // WEBRTC_SPL_SPL_INL_MIPS_H_ diff --git a/webrtc/common_audio/signal_processing/signal_processing_unittest.cc b/webrtc/common_audio/signal_processing/signal_processing_unittest.cc index a83a790d7..a1bf0d5e8 100644 --- a/webrtc/common_audio/signal_processing/signal_processing_unittest.cc +++ b/webrtc/common_audio/signal_processing/signal_processing_unittest.cc @@ -529,12 +529,14 @@ TEST_F(SplTest, CrossCorrelationTest) { // are not bit-exact. const int32_t kExpected[kCrossCorrelationDimension] = {-266947903, -15579555, -171282001}; + const int32_t* expected = kExpected; +#if !defined(MIPS32_LE) const int32_t kExpectedNeon[kCrossCorrelationDimension] = {-266947901, -15579553, -171281999}; - const int32_t* expected = kExpected; if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) { expected = kExpectedNeon; } +#endif for (int i = 0; i < kCrossCorrelationDimension; ++i) { EXPECT_EQ(expected[i], vector32[i]); } diff --git a/webrtc/common_audio/signal_processing/spl_init.c b/webrtc/common_audio/signal_processing/spl_init.c index 4387cc876..454e13ba9 100644 --- a/webrtc/common_audio/signal_processing/spl_init.c +++ b/webrtc/common_audio/signal_processing/spl_init.c @@ -82,18 +82,20 @@ static void InitPointersToMIPS() { WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; - WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips; WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; - WebRtcSpl_ScaleAndAddVectorsWithRound = - WebRtcSpl_ScaleAndAddVectorsWithRoundC; WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC; WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC; WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC; WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC; #if defined(MIPS_DSP_R1_LE) WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRound_mips; #else WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; #endif } #endif diff --git a/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c b/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c new file mode 100644 index 000000000..8716459b1 --- /dev/null +++ b/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c @@ -0,0 +1,207 @@ +/* + * Written by Wilco Dijkstra, 1996. The following email exchange establishes the + * license. + * + * From: Wilco Dijkstra + * Date: Fri, Jun 24, 2011 at 3:20 AM + * Subject: Re: sqrt routine + * To: Kevin Ma + * Hi Kevin, + * Thanks for asking. Those routines are public domain (originally posted to + * comp.sys.arm a long time ago), so you can use them freely for any purpose. + * Cheers, + * Wilco + * + * ----- Original Message ----- + * From: "Kevin Ma" + * To: + * Sent: Thursday, June 23, 2011 11:44 PM + * Subject: Fwd: sqrt routine + * Hi Wilco, + * I saw your sqrt routine from several web sites, including + * http://www.finesse.demon.co.uk/steven/sqrt.html. + * Just wonder if there's any copyright information with your Successive + * approximation routines, or if I can freely use it for any purpose. + * Thanks. + * Kevin + */ + +// Minor modifications in code style for WebRTC, 2012. +// Code optimizations for MIPS, 2013. + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. + * + * If the input value is negative, it returns 0. + */ + + +int32_t WebRtcSpl_SqrtFloor(int32_t value) +{ + int32_t root = 0, tmp1, tmp2, tmp3, tmp4; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + + "lui %[tmp1], 0x4000 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "sub %[tmp3], %[value], %[tmp1] \n\t" + "lui %[tmp1], 0x1 \n\t" + "or %[tmp4], %[root], %[tmp1] \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x4000 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 14 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x8000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x2000 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 13 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x4000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x1000 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 12 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x2000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x800 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 11 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x1000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x400 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 10 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x800 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x200 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 9 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x400 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x100 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 8 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x200 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x80 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 7 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x100 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x40 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 6 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x80 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x20 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 5 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x40 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x10 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 4 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x20 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x8 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 3 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x10 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x4 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 2 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x8 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x2 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 1 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x4 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x1 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x2 \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + ".set pop \n\t" + + : [root] "+r" (root), [value] "+r" (value), + [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), + [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) + : + ); + + return root >> 1; +} + diff --git a/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c b/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c new file mode 100644 index 000000000..5ddcd2df7 --- /dev/null +++ b/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_ScaleAndAddVectorsWithRound_mips() + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + int length) { + int16_t r0 = 0, r1 = 0; + int16_t *in1 = (int16_t*)in_vector1; + int16_t *in2 = (int16_t*)in_vector2; + int16_t *out = out_vector; + int i = 0, value32 = 0; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length <= 0 || right_shifts < 0) { + return -1; + } + for (i = 0; i < length; i++) { + __asm __volatile ( + "lh %[r0], 0(%[in1]) \n\t" + "lh %[r1], 0(%[in2]) \n\t" + "mult %[r0], %[in_vector1_scale] \n\t" + "madd %[r1], %[in_vector2_scale] \n\t" + "extrv_r.w %[value32], $ac0, %[right_shifts] \n\t" + "addiu %[in1], %[in1], 2 \n\t" + "addiu %[in2], %[in2], 2 \n\t" + "sh %[value32], 0(%[out]) \n\t" + "addiu %[out], %[out], 2 \n\t" + : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1), + [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1) + : [in_vector1_scale] "r" (in_vector1_scale), + [in_vector2_scale] "r" (in_vector2_scale), + [right_shifts] "r" (right_shifts) + : "hi", "lo", "memory" + ); + } + return 0; +}