diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h index dd85090b4..efed76709 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h @@ -166,4 +166,24 @@ void WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[], const int matrix0_index_step); #endif +#if defined(MIPS32_LE) +void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[], + const int32_t matrix1[], + int32_t matrix_product[], + const int matrix1_index_factor1, + const int matrix0_index_factor1, + const int matrix1_index_init_case, + const int matrix1_index_step, + const int matrix0_index_step, + const int inner_loop_count, + const int mid_loop_count, + const int shift); + +void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[], + const int32_t matrix1[], + int32_t matrix_product[], + const int matrix0_index_factor, + const int matrix0_index_step); +#endif + #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_ENTROPY_CODING_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_mips.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_mips.c new file mode 100644 index 000000000..a66a43ef9 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_mips.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/isac/fix/source/entropy_coding.h" +#include "modules/audio_coding/codecs/isac/fix/source/settings.h" + +// MIPS optimization of the function WebRtcIsacfix_MatrixProduct1. +// Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from +// entropy_coding.c file. +void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[], + const int32_t matrix1[], + int32_t matrix_product[], + const int matrix1_index_factor1, + const int matrix0_index_factor1, + const int matrix1_index_init_case, + const int matrix1_index_step, + const int matrix0_index_step, + const int inner_loop_count, + const int mid_loop_count, + const int shift) { + if (matrix1_index_init_case != 0) { + int j = SUBFRAMES, k = 0, n = 0; + int32_t r0, r1, r2, sum32; + int32_t* product_start = matrix_product; + int32_t* product_ptr; + const uint32_t product_step = 4 * mid_loop_count; + const uint32_t matrix0_step = 2 * matrix0_index_step; + const uint32_t matrix1_step = 4 * matrix1_index_step; + const uint32_t matrix0_step2 = 2 * matrix0_index_factor1; + const uint32_t matrix1_step2 = 4 * matrix1_index_factor1; + const int16_t* matrix0_start = matrix0; + const int32_t* matrix1_start = matrix1; + int16_t* matrix0_ptr; + int32_t* matrix1_ptr; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "addu %[product_ptr], %[product_start], $0 \n\t" + "addu %[k], %[product_step], $0 \n\t" + "addiu %[j], %[j], -1 \n\t" + "addu %[matrix1_start], %[matrix1], $0 \n\t" + "2: \n\t" + "addu %[matrix1_ptr], %[matrix1_start], $0 \n\t" + "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t" + "addu %[n], %[inner_loop_count], $0 \n\t" + "mul %[sum32], $0, $0 \n\t" + "3: \n\t" + "lw %[r0], 0(%[matrix1_ptr]) \n\t" + "lh %[r1], 0(%[matrix0_ptr]) \n\t" + "addu %[matrix1_ptr], %[matrix1_ptr], %[matrix1_step] \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "andi %[r2], %[r0], 0xffff \n\t" + "sra %[r2], %[r2], 1 \n\t" + "mul %[r2], %[r2], %[r1] \n\t" + "sra %[r0], %[r0], 16 \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t" + "addiu %[n], %[n], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r2], %[r2], 15 \n\t" +#else + "addiu %[r2], %[r2], 0x4000 \n\t" + "sra %[r2], %[r2], 15 \n\t" +#endif + "addu %[sum32], %[sum32], %[r2] \n\t" + "bgtz %[n], 3b \n\t" + " addu %[sum32], %[sum32], %[r0] \n\t" + "addiu %[k], %[k], -4 \n\t" + "addu %[matrix1_start], %[matrix1_start], %[matrix1_step2] \n\t" + "sw %[sum32], 0(%[product_ptr]) \n\t" + "bgtz %[k], 2b \n\t" + " addiu %[product_ptr], %[product_ptr], 4 \n\t" + "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t" + "bgtz %[j], 1b \n\t" + " addu %[product_start], %[product_start], %[product_step] \n\t" + ".set pop \n\t" + : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start), + [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start), + [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), + [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0), + [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2) + : [product_step] "r" (product_step), [matrix1] "r" (matrix1), + [inner_loop_count] "r" (inner_loop_count), + [matrix1_step] "r" (matrix1_step), [shift] "r" (shift), + [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2), + [matrix0_step2] "r" (matrix0_step2) + : "hi", "lo", "memory" + ); + } else { + int j = SUBFRAMES, k = 0, n = 0; + int32_t r0, r1, r2, sum32; + int32_t* product_start = matrix_product; + int32_t* product_ptr; + const uint32_t product_step = 4 * mid_loop_count; + const uint32_t matrix0_step = 2 * matrix0_index_step; + const uint32_t matrix1_step = 4 * matrix1_index_step; + const uint32_t matrix0_step2 = 2 * matrix0_index_factor1; + const uint32_t matrix1_step2 = 4 * matrix1_index_factor1; + const int16_t* matrix0_start = matrix0; + const int32_t* matrix1_start = matrix1; + int16_t* matrix0_ptr; + int32_t* matrix1_ptr; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "addu %[product_ptr], %[product_start], $0 \n\t" + "addu %[k], %[product_step], $0 \n\t" + "addiu %[j], %[j], -1 \n\t" + "addu %[matrix0_start], %[matrix0], $0 \n\t" + "2: \n\t" + "addu %[matrix1_ptr], %[matrix1_start], $0 \n\t" + "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t" + "addu %[n], %[inner_loop_count], $0 \n\t" + "mul %[sum32], $0, $0 \n\t" + "3: \n\t" + "lw %[r0], 0(%[matrix1_ptr]) \n\t" + "lh %[r1], 0(%[matrix0_ptr]) \n\t" + "addu %[matrix1_ptr], %[matrix1_ptr], %[matrix1_step] \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "andi %[r2], %[r0], 0xffff \n\t" + "sra %[r2], %[r2], 1 \n\t" + "mul %[r2], %[r2], %[r1] \n\t" + "sra %[r0], %[r0], 16 \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t" + "addiu %[n], %[n], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r2], %[r2], 15 \n\t" +#else + "addiu %[r2], %[r2], 0x4000 \n\t" + "sra %[r2], %[r2], 15 \n\t" +#endif + "addu %[sum32], %[sum32], %[r2] \n\t" + "bgtz %[n], 3b \n\t" + " addu %[sum32], %[sum32], %[r0] \n\t" + "addiu %[k], %[k], -4 \n\t" + "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t" + "sw %[sum32], 0(%[product_ptr]) \n\t" + "bgtz %[k], 2b \n\t" + " addiu %[product_ptr], %[product_ptr], 4 \n\t" + "addu %[matrix1_start], %[matrix1_start], %[matrix1_step2] \n\t" + "bgtz %[j], 1b \n\t" + " addu %[product_start], %[product_start], %[product_step] \n\t" + ".set pop \n\t" + : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start), + [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start), + [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), + [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0), + [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2) + : [product_step] "r" (product_step), [matrix0] "r" (matrix0), + [inner_loop_count] "r" (inner_loop_count), + [matrix1_step] "r" (matrix1_step), [shift] "r" (shift), + [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2), + [matrix0_step2] "r" (matrix0_step2) + : "hi", "lo", "memory" + ); + } +} + +// MIPS optimization of the function WebRtcIsacfix_MatrixProduct2. +// Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from +// entropy_coding.c file. +void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[], + const int32_t matrix1[], + int32_t matrix_product[], + const int matrix0_index_factor, + const int matrix0_index_step) { + int j = 0, n = 0; + int loop_count = SUBFRAMES; + const int16_t* matrix0_ptr; + const int32_t* matrix1_ptr; + const int16_t* matrix0_start = matrix0; + const int matrix0_step = 2 * matrix0_index_step; + const int matrix0_step2 = 2 * matrix0_index_factor; + int32_t r0, r1, r2, r3, r4, sum32, sum32_2; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addu %[j], %[loop_count], $0 \n\t" + "addu %[matrix0_start], %[matrix0], $0 \n\t" + "1: \n\t" + "addu %[matrix1_ptr], %[matrix1], $0 \n\t" + "addu %[matrix0_ptr], %[matrix0_start], $0 \n\t" + "addu %[n], %[loop_count], $0 \n\t" + "mul %[sum32], $0, $0 \n\t" + "mul %[sum32_2], $0, $0 \n\t" + "2: \n\t" + "lw %[r0], 0(%[matrix1_ptr]) \n\t" + "lw %[r1], 4(%[matrix1_ptr]) \n\t" + "lh %[r2], 0(%[matrix0_ptr]) \n\t" + "andi %[r3], %[r0], 0xffff \n\t" + "sra %[r3], %[r3], 1 \n\t" + "mul %[r3], %[r3], %[r2] \n\t" + "andi %[r4], %[r1], 0xffff \n\t" + "sra %[r4], %[r4], 1 \n\t" + "mul %[r4], %[r4], %[r2] \n\t" + "sra %[r0], %[r0], 16 \n\t" + "mul %[r0], %[r0], %[r2] \n\t" + "sra %[r1], %[r1], 16 \n\t" + "mul %[r1], %[r1], %[r2] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r3], %[r3], 15 \n\t" + "shra_r.w %[r4], %[r4], 15 \n\t" +#else + "addiu %[r3], %[r3], 0x4000 \n\t" + "sra %[r3], %[r3], 15 \n\t" + "addiu %[r4], %[r4], 0x4000 \n\t" + "sra %[r4], %[r4], 15 \n\t" +#endif + "addiu %[matrix1_ptr], %[matrix1_ptr], 8 \n\t" + "addu %[matrix0_ptr], %[matrix0_ptr], %[matrix0_step] \n\t" + "addiu %[n], %[n], -1 \n\t" + "addu %[sum32], %[sum32], %[r3] \n\t" + "addu %[sum32_2], %[sum32_2], %[r4] \n\t" + "addu %[sum32], %[sum32], %[r0] \n\t" + "bgtz %[n], 2b \n\t" + " addu %[sum32_2], %[sum32_2], %[r1] \n\t" + "sra %[sum32], %[sum32], 3 \n\t" + "sra %[sum32_2], %[sum32_2], 3 \n\t" + "addiu %[j], %[j], -1 \n\t" + "addu %[matrix0_start], %[matrix0_start], %[matrix0_step2] \n\t" + "sw %[sum32], 0(%[matrix_product]) \n\t" + "sw %[sum32_2], 4(%[matrix_product]) \n\t" + "bgtz %[j], 1b \n\t" + " addiu %[matrix_product], %[matrix_product], 8 \n\t" + ".set pop \n\t" + : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start), + [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), + [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2), + [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), + [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product) + : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0), + [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step), + [matrix0_step2] "r" (matrix0_step2) + : "hi", "lo", "memory" + ); +} diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c index 887a7ba2c..3be2b151f 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c @@ -211,6 +211,8 @@ static void WebRtcIsacfix_InitMIPS(void) { WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopMIPS; WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeMIPS; WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecMIPS; + WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1MIPS; + WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2MIPS; #if defined(MIPS_DSP_R1_LE) WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16MIPS; diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi index e5aade657..7010b1ee4 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi @@ -89,6 +89,7 @@ }], ['target_arch=="mipsel"', { 'sources': [ + 'entropy_coding_mips.c', 'filters_mips.c', 'lattice_mips.c', 'pitch_estimator_mips.c',