MIPS optimizations for ISAC (patch #3)
Implemented functions: - WebRtcIsacfix_MatrixProduct1 - WebRtcIsacfix_MatrixProduct2 The optimizations are bit-exact to the C code. R=andrew@webrtc.org, tina.legrand@webrtc.org Review URL: https://webrtc-codereview.appspot.com/18019004 Patch from Ljubomir Papuga <lpapuga@mips.com>. git-svn-id: http://webrtc.googlecode.com/svn/trunk@6919 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -166,4 +166,24 @@ void WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[], | ||||
|                                       const int matrix0_index_step); | ||||
| #endif | ||||
|  | ||||
| #if defined(MIPS32_LE) | ||||
| void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[], | ||||
|                                       const int32_t matrix1[], | ||||
|                                       int32_t matrix_product[], | ||||
|                                       const int matrix1_index_factor1, | ||||
|                                       const int matrix0_index_factor1, | ||||
|                                       const int matrix1_index_init_case, | ||||
|                                       const int matrix1_index_step, | ||||
|                                       const int matrix0_index_step, | ||||
|                                       const int inner_loop_count, | ||||
|                                       const int mid_loop_count, | ||||
|                                       const int shift); | ||||
|  | ||||
| void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[], | ||||
|                                       const int32_t matrix1[], | ||||
|                                       int32_t matrix_product[], | ||||
|                                       const int matrix0_index_factor, | ||||
|                                       const int matrix0_index_step); | ||||
| #endif | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_ENTROPY_CODING_H_ | ||||
|   | ||||
| @@ -0,0 +1,249 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "modules/audio_coding/codecs/isac/fix/source/entropy_coding.h" | ||||
| #include "modules/audio_coding/codecs/isac/fix/source/settings.h" | ||||
|  | ||||
| // MIPS optimization of the function WebRtcIsacfix_MatrixProduct1. | ||||
| // Bit-exact with the function WebRtcIsacfix_MatrixProduct1C from | ||||
| // entropy_coding.c file. | ||||
| void WebRtcIsacfix_MatrixProduct1MIPS(const int16_t matrix0[], | ||||
|                                       const int32_t matrix1[], | ||||
|                                       int32_t matrix_product[], | ||||
|                                       const int matrix1_index_factor1, | ||||
|                                       const int matrix0_index_factor1, | ||||
|                                       const int matrix1_index_init_case, | ||||
|                                       const int matrix1_index_step, | ||||
|                                       const int matrix0_index_step, | ||||
|                                       const int inner_loop_count, | ||||
|                                       const int mid_loop_count, | ||||
|                                       const int shift) { | ||||
|   if (matrix1_index_init_case != 0) { | ||||
|     int j = SUBFRAMES, k = 0, n = 0; | ||||
|     int32_t r0, r1, r2, sum32; | ||||
|     int32_t* product_start = matrix_product; | ||||
|     int32_t* product_ptr; | ||||
|     const uint32_t product_step = 4 * mid_loop_count; | ||||
|     const uint32_t matrix0_step = 2 * matrix0_index_step; | ||||
|     const uint32_t matrix1_step = 4 * matrix1_index_step; | ||||
|     const uint32_t matrix0_step2 = 2 * matrix0_index_factor1; | ||||
|     const uint32_t matrix1_step2 = 4 * matrix1_index_factor1; | ||||
|     const int16_t* matrix0_start = matrix0; | ||||
|     const int32_t* matrix1_start = matrix1; | ||||
|     int16_t* matrix0_ptr; | ||||
|     int32_t* matrix1_ptr; | ||||
|  | ||||
|     __asm __volatile ( | ||||
|       ".set     push                                                       \n\t" | ||||
|       ".set     noreorder                                                  \n\t" | ||||
|      "1:                                                                   \n\t" | ||||
|       "addu     %[product_ptr],     %[product_start],     $0               \n\t" | ||||
|       "addu     %[k],               %[product_step],      $0               \n\t" | ||||
|       "addiu    %[j],               %[j],                 -1               \n\t" | ||||
|       "addu     %[matrix1_start],   %[matrix1],           $0               \n\t" | ||||
|      "2:                                                                   \n\t" | ||||
|       "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t" | ||||
|       "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t" | ||||
|       "addu     %[n],               %[inner_loop_count],  $0               \n\t" | ||||
|       "mul      %[sum32],           $0,                   $0               \n\t" | ||||
|      "3:                                                                   \n\t" | ||||
|       "lw       %[r0],              0(%[matrix1_ptr])                      \n\t" | ||||
|       "lh       %[r1],              0(%[matrix0_ptr])                      \n\t" | ||||
|       "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t" | ||||
|       "sllv     %[r0],              %[r0],                %[shift]         \n\t" | ||||
|       "andi     %[r2],              %[r0],                0xffff           \n\t" | ||||
|       "sra      %[r2],              %[r2],                1                \n\t" | ||||
|       "mul      %[r2],              %[r2],                %[r1]            \n\t" | ||||
|       "sra      %[r0],              %[r0],                16               \n\t" | ||||
|       "mul      %[r0],              %[r0],                %[r1]            \n\t" | ||||
|       "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t" | ||||
|       "addiu    %[n],               %[n],                 -1               \n\t" | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
|       "shra_r.w %[r2],              %[r2],                15               \n\t" | ||||
| #else | ||||
|       "addiu    %[r2],              %[r2],                0x4000           \n\t" | ||||
|       "sra      %[r2],              %[r2],                15               \n\t" | ||||
| #endif | ||||
|       "addu     %[sum32],           %[sum32],             %[r2]            \n\t" | ||||
|       "bgtz     %[n],               3b                                     \n\t" | ||||
|       " addu    %[sum32],           %[sum32],             %[r0]            \n\t" | ||||
|       "addiu    %[k],               %[k],                 -4               \n\t" | ||||
|       "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t" | ||||
|       "sw       %[sum32],           0(%[product_ptr])                      \n\t" | ||||
|       "bgtz     %[k],               2b                                     \n\t" | ||||
|       " addiu   %[product_ptr],     %[product_ptr],       4                \n\t" | ||||
|       "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t" | ||||
|       "bgtz     %[j],               1b                                     \n\t" | ||||
|       " addu    %[product_start],   %[product_start],     %[product_step]  \n\t" | ||||
|       ".set     pop                                                        \n\t" | ||||
|       : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start), | ||||
|         [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "=&r"(matrix1_start), | ||||
|         [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), | ||||
|         [matrix0_start] "+r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0), | ||||
|         [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2) | ||||
|       : [product_step] "r" (product_step), [matrix1] "r" (matrix1), | ||||
|         [inner_loop_count] "r" (inner_loop_count), | ||||
|         [matrix1_step] "r" (matrix1_step), [shift] "r" (shift), | ||||
|         [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2), | ||||
|         [matrix0_step2] "r" (matrix0_step2) | ||||
|       : "hi", "lo", "memory" | ||||
|     ); | ||||
|   } else { | ||||
|     int j = SUBFRAMES, k = 0, n = 0; | ||||
|     int32_t r0, r1, r2, sum32; | ||||
|     int32_t* product_start = matrix_product; | ||||
|     int32_t* product_ptr; | ||||
|     const uint32_t product_step = 4 * mid_loop_count; | ||||
|     const uint32_t matrix0_step = 2 * matrix0_index_step; | ||||
|     const uint32_t matrix1_step = 4 * matrix1_index_step; | ||||
|     const uint32_t matrix0_step2 = 2 * matrix0_index_factor1; | ||||
|     const uint32_t matrix1_step2 = 4 * matrix1_index_factor1; | ||||
|     const int16_t* matrix0_start = matrix0; | ||||
|     const int32_t* matrix1_start = matrix1; | ||||
|     int16_t* matrix0_ptr; | ||||
|     int32_t* matrix1_ptr; | ||||
|  | ||||
|     __asm __volatile ( | ||||
|       ".set     push                                                       \n\t" | ||||
|       ".set     noreorder                                                  \n\t" | ||||
|      "1:                                                                   \n\t" | ||||
|       "addu     %[product_ptr],     %[product_start],     $0               \n\t" | ||||
|       "addu     %[k],               %[product_step],      $0               \n\t" | ||||
|       "addiu    %[j],               %[j],                 -1               \n\t" | ||||
|       "addu     %[matrix0_start],   %[matrix0],           $0               \n\t" | ||||
|      "2:                                                                   \n\t" | ||||
|       "addu     %[matrix1_ptr],     %[matrix1_start],     $0               \n\t" | ||||
|       "addu     %[matrix0_ptr],     %[matrix0_start],     $0               \n\t" | ||||
|       "addu     %[n],               %[inner_loop_count],  $0               \n\t" | ||||
|       "mul      %[sum32],           $0,                   $0               \n\t" | ||||
|      "3:                                                                   \n\t" | ||||
|       "lw       %[r0],              0(%[matrix1_ptr])                      \n\t" | ||||
|       "lh       %[r1],              0(%[matrix0_ptr])                      \n\t" | ||||
|       "addu     %[matrix1_ptr],     %[matrix1_ptr],       %[matrix1_step]  \n\t" | ||||
|       "sllv     %[r0],              %[r0],                %[shift]         \n\t" | ||||
|       "andi     %[r2],              %[r0],                0xffff           \n\t" | ||||
|       "sra      %[r2],              %[r2],                1                \n\t" | ||||
|       "mul      %[r2],              %[r2],                %[r1]            \n\t" | ||||
|       "sra      %[r0],              %[r0],                16               \n\t" | ||||
|       "mul      %[r0],              %[r0],                %[r1]            \n\t" | ||||
|       "addu     %[matrix0_ptr],     %[matrix0_ptr],       %[matrix0_step]  \n\t" | ||||
|       "addiu    %[n],               %[n],                 -1               \n\t" | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
|       "shra_r.w %[r2],              %[r2],                15               \n\t" | ||||
| #else | ||||
|       "addiu    %[r2],              %[r2],                0x4000           \n\t" | ||||
|       "sra      %[r2],              %[r2],                15               \n\t" | ||||
| #endif | ||||
|       "addu     %[sum32],           %[sum32],             %[r2]            \n\t" | ||||
|       "bgtz     %[n],               3b                                     \n\t" | ||||
|       " addu    %[sum32],           %[sum32],             %[r0]            \n\t" | ||||
|       "addiu    %[k],               %[k],                 -4               \n\t" | ||||
|       "addu     %[matrix0_start],   %[matrix0_start],     %[matrix0_step2] \n\t" | ||||
|       "sw       %[sum32],           0(%[product_ptr])                      \n\t" | ||||
|       "bgtz     %[k],               2b                                     \n\t" | ||||
|       " addiu   %[product_ptr],     %[product_ptr],       4                \n\t" | ||||
|       "addu     %[matrix1_start],   %[matrix1_start],     %[matrix1_step2] \n\t" | ||||
|       "bgtz     %[j],               1b                                     \n\t" | ||||
|       " addu    %[product_start],   %[product_start],     %[product_step]  \n\t" | ||||
|       ".set     pop                                                        \n\t" | ||||
|       : [product_ptr] "=&r" (product_ptr), [product_start] "+r" (product_start), | ||||
|         [k] "=&r" (k), [j] "+r" (j), [matrix1_start] "+r"(matrix1_start), | ||||
|         [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), | ||||
|         [matrix0_start] "=&r" (matrix0_start), [n] "=&r" (n), [r0] "=&r" (r0), | ||||
|         [sum32] "=&r" (sum32), [r1] "=&r" (r1),[r2] "=&r" (r2) | ||||
|       : [product_step] "r" (product_step), [matrix0] "r" (matrix0), | ||||
|         [inner_loop_count] "r" (inner_loop_count), | ||||
|         [matrix1_step] "r" (matrix1_step), [shift] "r" (shift), | ||||
|         [matrix0_step] "r" (matrix0_step), [matrix1_step2] "r" (matrix1_step2), | ||||
|         [matrix0_step2] "r" (matrix0_step2) | ||||
|       : "hi", "lo", "memory" | ||||
|     ); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // MIPS optimization of the function WebRtcIsacfix_MatrixProduct2. | ||||
| // Bit-exact with the function WebRtcIsacfix_MatrixProduct2C from | ||||
| // entropy_coding.c file. | ||||
| void WebRtcIsacfix_MatrixProduct2MIPS(const int16_t matrix0[], | ||||
|                                       const int32_t matrix1[], | ||||
|                                       int32_t matrix_product[], | ||||
|                                       const int matrix0_index_factor, | ||||
|                                       const int matrix0_index_step) { | ||||
|   int j = 0, n = 0; | ||||
|   int loop_count = SUBFRAMES; | ||||
|   const int16_t* matrix0_ptr; | ||||
|   const int32_t* matrix1_ptr; | ||||
|   const int16_t* matrix0_start = matrix0; | ||||
|   const int matrix0_step = 2 * matrix0_index_step; | ||||
|   const int matrix0_step2 = 2 * matrix0_index_factor; | ||||
|   int32_t r0, r1, r2, r3, r4, sum32, sum32_2; | ||||
|  | ||||
|   __asm __volatile ( | ||||
|     ".set       push                                                   \n\t" | ||||
|     ".set       noreorder                                              \n\t" | ||||
|     "addu       %[j],              %[loop_count],     $0               \n\t" | ||||
|     "addu       %[matrix0_start],  %[matrix0],        $0               \n\t" | ||||
|    "1:                                                                 \n\t" | ||||
|     "addu       %[matrix1_ptr],    %[matrix1],        $0               \n\t" | ||||
|     "addu       %[matrix0_ptr],    %[matrix0_start],  $0               \n\t" | ||||
|     "addu       %[n],              %[loop_count],     $0               \n\t" | ||||
|     "mul        %[sum32],          $0,                $0               \n\t" | ||||
|     "mul        %[sum32_2],        $0,                $0               \n\t" | ||||
|    "2:                                                                 \n\t" | ||||
|     "lw         %[r0],             0(%[matrix1_ptr])                   \n\t" | ||||
|     "lw         %[r1],             4(%[matrix1_ptr])                   \n\t" | ||||
|     "lh         %[r2],             0(%[matrix0_ptr])                   \n\t" | ||||
|     "andi       %[r3],             %[r0],             0xffff           \n\t" | ||||
|     "sra        %[r3],             %[r3],             1                \n\t" | ||||
|     "mul        %[r3],             %[r3],             %[r2]            \n\t" | ||||
|     "andi       %[r4],             %[r1],             0xffff           \n\t" | ||||
|     "sra        %[r4],             %[r4],             1                \n\t" | ||||
|     "mul        %[r4],             %[r4],             %[r2]            \n\t" | ||||
|     "sra        %[r0],             %[r0],             16               \n\t" | ||||
|     "mul        %[r0],             %[r0],             %[r2]            \n\t" | ||||
|     "sra        %[r1],             %[r1],             16               \n\t" | ||||
|     "mul        %[r1],             %[r1],             %[r2]            \n\t" | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
|     "shra_r.w   %[r3],             %[r3],             15               \n\t" | ||||
|     "shra_r.w   %[r4],             %[r4],             15               \n\t" | ||||
| #else | ||||
|     "addiu      %[r3],             %[r3],             0x4000           \n\t" | ||||
|     "sra        %[r3],             %[r3],             15               \n\t" | ||||
|     "addiu      %[r4],             %[r4],             0x4000           \n\t" | ||||
|     "sra        %[r4],             %[r4],             15               \n\t" | ||||
| #endif | ||||
|     "addiu      %[matrix1_ptr],    %[matrix1_ptr],    8                \n\t" | ||||
|     "addu       %[matrix0_ptr],    %[matrix0_ptr],    %[matrix0_step]  \n\t" | ||||
|     "addiu      %[n],              %[n],              -1               \n\t" | ||||
|     "addu       %[sum32],          %[sum32],          %[r3]            \n\t" | ||||
|     "addu       %[sum32_2],        %[sum32_2],        %[r4]            \n\t" | ||||
|     "addu       %[sum32],          %[sum32],          %[r0]            \n\t" | ||||
|     "bgtz       %[n],              2b                                  \n\t" | ||||
|     " addu      %[sum32_2],        %[sum32_2],        %[r1]            \n\t" | ||||
|     "sra        %[sum32],          %[sum32],          3                \n\t" | ||||
|     "sra        %[sum32_2],        %[sum32_2],        3                \n\t" | ||||
|     "addiu      %[j],              %[j],              -1               \n\t" | ||||
|     "addu       %[matrix0_start],  %[matrix0_start],  %[matrix0_step2] \n\t" | ||||
|     "sw         %[sum32],          0(%[matrix_product])                \n\t" | ||||
|     "sw         %[sum32_2],        4(%[matrix_product])                \n\t" | ||||
|     "bgtz       %[j],              1b                                  \n\t" | ||||
|     " addiu     %[matrix_product], %[matrix_product], 8                \n\t" | ||||
|     ".set       pop                                                    \n\t" | ||||
|     : [j] "=&r" (j), [matrix0_start] "=&r" (matrix0_start), | ||||
|       [matrix1_ptr] "=&r" (matrix1_ptr), [matrix0_ptr] "=&r" (matrix0_ptr), | ||||
|       [n] "=&r" (n), [sum32] "=&r" (sum32), [sum32_2] "=&r" (sum32_2), | ||||
|       [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), | ||||
|       [r4] "=&r" (r4), [matrix_product] "+r" (matrix_product) | ||||
|     : [loop_count] "r" (loop_count), [matrix0] "r" (matrix0), | ||||
|       [matrix1] "r" (matrix1), [matrix0_step] "r" (matrix0_step), | ||||
|       [matrix0_step2] "r" (matrix0_step2) | ||||
|     : "hi", "lo", "memory" | ||||
|   ); | ||||
| } | ||||
| @@ -211,6 +211,8 @@ static void WebRtcIsacfix_InitMIPS(void) { | ||||
|   WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopMIPS; | ||||
|   WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeMIPS; | ||||
|   WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecMIPS; | ||||
|   WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1MIPS; | ||||
|   WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2MIPS; | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
|   WebRtcIsacfix_AllpassFilter2FixDec16 = | ||||
|       WebRtcIsacfix_AllpassFilter2FixDec16MIPS; | ||||
|   | ||||
| @@ -89,6 +89,7 @@ | ||||
|         }], | ||||
|         ['target_arch=="mipsel"', { | ||||
|           'sources': [ | ||||
|             'entropy_coding_mips.c', | ||||
|             'filters_mips.c', | ||||
|             'lattice_mips.c', | ||||
|             'pitch_estimator_mips.c', | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 andrew@webrtc.org
					andrew@webrtc.org