MIPS optimizations for Signal Processing Library patch01
Review URL: https://webrtc-codereview.appspot.com/1028004 Patch from Ljubomir Papuga <lpapuga@mips.com>. git-svn-id: http://webrtc.googlecode.com/svn/trunk@3557 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		
							
								
								
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -5,8 +5,8 @@ Google Inc. | ||||
| Mozilla Foundation | ||||
| Intel Corporation | ||||
| Vonage Holdings Corp. | ||||
| MIPS Technologies | ||||
| Ben Strong <bstrong@gmail.com> | ||||
| Petar Jovanovic <petarj@mips.com> | ||||
| Martin Storsjo <martin@martin.st> | ||||
| Jie Mao <maojie0924@gmail.com> | ||||
| Anil Kumar <an1kumar@gmail.com> | ||||
|   | ||||
| @@ -73,6 +73,12 @@ | ||||
|  | ||||
|     'libyuv_dir%': '<(DEPTH)/third_party/libyuv', | ||||
|  | ||||
|     # Define MIPS architecture variant, MIPS DSP variant and MIPS FPU | ||||
|     # This may be subject to change in accordance to Chromium's MIPS flags | ||||
|     'mips_arch_variant%': 'mips32r1', | ||||
|     'mips_dsp_rev%': 0, | ||||
|     'mips_fpu%' : 1, | ||||
|  | ||||
|     'conditions': [ | ||||
|       ['build_with_chromium==1', { | ||||
|         # Exclude pulse audio on Chromium since its prerequisites don't require | ||||
| @@ -199,6 +205,59 @@ | ||||
|           }], | ||||
|         ], | ||||
|       }], | ||||
|       ['target_arch=="mipsel"', { | ||||
|         'defines': [ | ||||
|           'MIPS32_LE', | ||||
|         ], | ||||
|         'conditions': [ | ||||
|           ['mips_fpu==1', { | ||||
|             'defines': [ | ||||
|               'MIPS_FPU_LE', | ||||
|             ], | ||||
|             'cflags': [ | ||||
|               '-mhard-float', | ||||
|             ], | ||||
|           }, { | ||||
|             'cflags': [ | ||||
|               '-msoft-float', | ||||
|             ], | ||||
|           }], | ||||
|           ['mips_arch_variant=="mips32r2"', { | ||||
|             'defines': [ | ||||
|               'MIPS32_R2_LE', | ||||
|             ], | ||||
|             'cflags': [ | ||||
|               '-mips32r2', | ||||
|             ], | ||||
|             'cflags_cc': [ | ||||
|               '-mips32r2', | ||||
|             ], | ||||
|           }], | ||||
|           ['mips_dsp_rev==1', { | ||||
|             'defines': [ | ||||
|               'MIPS_DSP_R1_LE', | ||||
|             ], | ||||
|             'cflags': [ | ||||
|               '-mdsp', | ||||
|             ], | ||||
|             'cflags_cc': [ | ||||
|               '-mdsp', | ||||
|             ], | ||||
|           }], | ||||
|           ['mips_dsp_rev==2', { | ||||
|             'defines': [ | ||||
|               'MIPS_DSP_R1_LE', | ||||
|               'MIPS_DSP_R2_LE', | ||||
|             ], | ||||
|             'cflags': [ | ||||
|               '-mdspr2', | ||||
|             ], | ||||
|             'cflags_cc': [ | ||||
|               '-mdspr2', | ||||
|             ], | ||||
|           }], | ||||
|         ], | ||||
|       }], | ||||
|       ['OS=="ios"', { | ||||
|         'defines': [ | ||||
|           'WEBRTC_MAC', | ||||
|   | ||||
| @@ -147,8 +147,7 @@ | ||||
|     ((WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT((a), 18816, 7) & 0x00007fff)) | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" | ||||
| { | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define WEBRTC_SPL_MEMCPY_W8(v1, v2, length) \ | ||||
| @@ -223,6 +222,9 @@ int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length); | ||||
| #endif | ||||
| #if defined(MIPS32_LE) | ||||
| int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, int length); | ||||
| #endif | ||||
|  | ||||
| // Returns the largest absolute value in a signed 32-bit vector. | ||||
| // | ||||
| @@ -238,6 +240,9 @@ int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length); | ||||
| #endif | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
| int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, int length); | ||||
| #endif | ||||
|  | ||||
| // Returns the maximum value of a 16-bit vector. | ||||
| // | ||||
| @@ -255,6 +260,9 @@ int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length); | ||||
| #endif | ||||
| #if defined(MIPS32_LE) | ||||
| int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, int length); | ||||
| #endif | ||||
|  | ||||
| // Returns the maximum value of a 32-bit vector. | ||||
| // | ||||
| @@ -272,6 +280,9 @@ int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length); | ||||
| #endif | ||||
| #if defined(MIPS32_LE) | ||||
| int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, int length); | ||||
| #endif | ||||
|  | ||||
| // Returns the minimum value of a 16-bit vector. | ||||
| // | ||||
| @@ -289,6 +300,9 @@ int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length); | ||||
| #endif | ||||
| #if defined(MIPS32_LE) | ||||
| int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, int length); | ||||
| #endif | ||||
|  | ||||
| // Returns the minimum value of a 32-bit vector. | ||||
| // | ||||
| @@ -306,6 +320,9 @@ int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length); | ||||
| #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) | ||||
| int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length); | ||||
| #endif | ||||
| #if defined(MIPS32_LE) | ||||
| int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, int length); | ||||
| #endif | ||||
|  | ||||
| // Returns the vector index to the largest absolute value of a 16-bit vector. | ||||
| // | ||||
|   | ||||
							
								
								
									
										386
									
								
								webrtc/common_audio/signal_processing/min_max_operations_mips.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										386
									
								
								webrtc/common_audio/signal_processing/min_max_operations_mips.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,386 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| /* | ||||
|  * This file contains the implementation of function | ||||
|  * WebRtcSpl_MaxAbsValueW16() | ||||
|  * | ||||
|  * The description header can be found in signal_processing_library.h. | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| #include "signal_processing_library.h" | ||||
|  | ||||
| // Maximum absolute value of word16 vector. | ||||
| int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, int length) { | ||||
|   WebRtc_Word32 totMax = 0; | ||||
|   WebRtc_Word32 tmp32_0, tmp32_1, tmp32_2, tmp32_3; | ||||
|   int i, loop_size; | ||||
|  | ||||
|   if (vector == NULL || length <= 0) { | ||||
|     return -1; | ||||
|   } | ||||
| #if defined(MIPS_DSP_R1) | ||||
|   const WebRtc_Word32* tmpvec32 = (WebRtc_Word32*)vector; | ||||
|   loop_size = length >> 4; | ||||
|  | ||||
|   for (i = 0; i < loop_size; i++) { | ||||
|     __asm__ volatile ( | ||||
|       "lw         %[tmp32_0],     0(%[tmpvec32])              \n\t" | ||||
|       "lw         %[tmp32_1],     4(%[tmpvec32])              \n\t" | ||||
|       "lw         %[tmp32_2],     8(%[tmpvec32])              \n\t" | ||||
|       "lw         %[tmp32_3],     12(%[tmpvec32])             \n\t" | ||||
|  | ||||
|       "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t" | ||||
|       "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t" | ||||
|  | ||||
|       "lw         %[tmp32_0],     16(%[tmpvec32])             \n\t" | ||||
|       "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t" | ||||
|  | ||||
|       "lw         %[tmp32_1],     20(%[tmpvec32])             \n\t" | ||||
|       "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t" | ||||
|  | ||||
|       "lw         %[tmp32_2],     24(%[tmpvec32])             \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t" | ||||
|  | ||||
|       "lw         %[tmp32_3],     28(%[tmpvec32])             \n\t" | ||||
|       "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t" | ||||
|       "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t" | ||||
|  | ||||
|       "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t" | ||||
|       "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t" | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t" | ||||
|  | ||||
|       "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t" | ||||
|       "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t" | ||||
|  | ||||
|       "addiu      %[tmpvec32],    %[tmpvec32],    32          \n\t" | ||||
|       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), | ||||
|         [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), | ||||
|         [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32) | ||||
|       : | ||||
|       : "memory" | ||||
|     ); | ||||
|   } | ||||
|   __asm__ volatile ( | ||||
|     "rotr       %[tmp32_0],     %[totMax],      16          \n\t" | ||||
|     "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t" | ||||
|     "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t" | ||||
|     "packrl.ph  %[totMax],      $0,             %[totMax]   \n\t" | ||||
|     : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax) | ||||
|     : | ||||
|   ); | ||||
|   loop_size = length & 0xf; | ||||
|   for (i = 0; i < loop_size; i++) { | ||||
|     __asm__ volatile ( | ||||
|       "lh         %[tmp32_0],     0(%[tmpvec32])              \n\t" | ||||
|       "addiu      %[tmpvec32],    %[tmpvec32],     2          \n\t" | ||||
|       "absq_s.w   %[tmp32_0],     %[tmp32_0]                  \n\t" | ||||
|       "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t" | ||||
|       "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t" | ||||
|       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), | ||||
|         [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax) | ||||
|       : | ||||
|       : "memory" | ||||
|     ); | ||||
|   } | ||||
| #else  // #if defined(MIPS_DSP_R1) | ||||
|   WebRtc_Word32 v16MaxMax = WEBRTC_SPL_WORD16_MAX; | ||||
|   WebRtc_Word32 r, r1, r2, r3; | ||||
|   const WebRtc_Word16* tmpvector = vector; | ||||
|   loop_size = length >> 4; | ||||
|   for (i = 0; i < loop_size; i++) { | ||||
|     __asm__ volatile ( | ||||
|       "lh     %[tmp32_0],     0(%[tmpvector])                 \n\t" | ||||
|       "lh     %[tmp32_1],     2(%[tmpvector])                 \n\t" | ||||
|       "lh     %[tmp32_2],     4(%[tmpvector])                 \n\t" | ||||
|       "lh     %[tmp32_3],     6(%[tmpvector])                 \n\t" | ||||
|  | ||||
|       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t" | ||||
|       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t" | ||||
|       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t" | ||||
|       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t" | ||||
|  | ||||
|       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t" | ||||
|       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t" | ||||
|       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t" | ||||
|       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t" | ||||
|  | ||||
|       "lh     %[tmp32_0],     8(%[tmpvector])                 \n\t" | ||||
|       "lh     %[tmp32_1],     10(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_2],     12(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_3],     14(%[tmpvector])                \n\t" | ||||
|  | ||||
|       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t" | ||||
|       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t" | ||||
|       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t" | ||||
|       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t" | ||||
|  | ||||
|       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t" | ||||
|       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t" | ||||
|       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t" | ||||
|       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t" | ||||
|  | ||||
|       "lh     %[tmp32_0],     16(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_1],     18(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_2],     20(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_3],     22(%[tmpvector])                \n\t" | ||||
|  | ||||
|       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t" | ||||
|       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t" | ||||
|       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t" | ||||
|       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t" | ||||
|  | ||||
|       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t" | ||||
|       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t" | ||||
|       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t" | ||||
|       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t" | ||||
|  | ||||
|       "lh     %[tmp32_0],     24(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_1],     26(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_2],     28(%[tmpvector])                \n\t" | ||||
|       "lh     %[tmp32_3],     30(%[tmpvector])                \n\t" | ||||
|  | ||||
|       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t" | ||||
|       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t" | ||||
|       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t" | ||||
|       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t" | ||||
|  | ||||
|       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t" | ||||
|       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t" | ||||
|       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t" | ||||
|       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t" | ||||
|       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t" | ||||
|  | ||||
|       "addiu  %[tmpvector],   %[tmpvector],   32              \n\t" | ||||
|       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), | ||||
|         [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), | ||||
|         [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector), | ||||
|         [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) | ||||
|       : | ||||
|       : "memory" | ||||
|     ); | ||||
|   } | ||||
|   loop_size = length & 0xf; | ||||
|   for (i = 0; i < loop_size; i++) { | ||||
|     __asm__ volatile ( | ||||
|       "lh         %[tmp32_0],     0(%[tmpvector])             \n\t" | ||||
|       "addiu      %[tmpvector],   %[tmpvector],    2          \n\t" | ||||
|       "abs        %[tmp32_0],     %[tmp32_0]                  \n\t" | ||||
|       "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t" | ||||
|       "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t" | ||||
|       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), | ||||
|         [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax) | ||||
|       : | ||||
|       : "memory" | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   __asm__ volatile ( | ||||
|     "slt    %[r],       %[v16MaxMax],   %[totMax]   \n\t" | ||||
|     "movn   %[totMax],  %[v16MaxMax],   %[r]        \n\t" | ||||
|     : [totMax] "+r" (totMax), [r] "=&r" (r) | ||||
|     : [v16MaxMax] "r" (v16MaxMax) | ||||
|   ); | ||||
| #endif  // #if defined(MIPS_DSP_R1) | ||||
|   return (int16_t)totMax; | ||||
| } | ||||
|  | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
| // Maximum absolute value of word32 vector. Version for MIPS platform. | ||||
| int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, int length) { | ||||
|   // Use uint32_t for the local variables, to accommodate the return value | ||||
|   // of abs(0x80000000), which is 0x80000000. | ||||
|  | ||||
|   uint32_t absolute = 0, maximum = 0; | ||||
|   int tmp1 = 0, max_value = 0x7fffffff; | ||||
|  | ||||
|   if (vector == NULL || length <= 0) { | ||||
|     return -1; | ||||
|   } | ||||
|  | ||||
|   __asm__ volatile ( | ||||
|     ".set push                                                        \n\t" | ||||
|     ".set noreorder                                                   \n\t" | ||||
|  | ||||
|    "1:                                                                \n\t" | ||||
|     "lw         %[absolute],      0(%[vector])                        \n\t" | ||||
|     "absq_s.w   %[absolute],      %[absolute]                         \n\t" | ||||
|     "addiu      %[length],        %[length],          -1              \n\t" | ||||
|     "slt        %[tmp1],          %[maximum],         %[absolute]     \n\t" | ||||
|     "movn       %[maximum],       %[absolute],        %[tmp1]         \n\t" | ||||
|     "bgtz       %[length],        1b                                  \n\t" | ||||
|     " addiu     %[vector],        %[vector],          4               \n\t" | ||||
|     "slt        %[tmp1],          %[max_value],       %[maximum]      \n\t" | ||||
|     "movn       %[maximum],       %[max_value],       %[tmp1]         \n\t" | ||||
|  | ||||
|     ".set pop                                                         \n\t" | ||||
|  | ||||
|     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute) | ||||
|     : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value) | ||||
|     : "memory" | ||||
|   ); | ||||
|  | ||||
|   return (int32_t)maximum; | ||||
| } | ||||
| #endif  // #if defined(MIPS_DSP_R1_LE) | ||||
|  | ||||
| // Maximum value of word16 vector. Version for MIPS platform. | ||||
| int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, int length) { | ||||
|   int16_t maximum = WEBRTC_SPL_WORD16_MIN; | ||||
|   int tmp1; | ||||
|   int16_t value; | ||||
|  | ||||
|   if (vector == NULL || length <= 0) { | ||||
|     return maximum; | ||||
|   } | ||||
|  | ||||
|   __asm__ volatile ( | ||||
|     ".set push                                                        \n\t" | ||||
|     ".set noreorder                                                   \n\t" | ||||
|  | ||||
|    "1:                                                                \n\t" | ||||
|     "lh         %[value],         0(%[vector])                        \n\t" | ||||
|     "addiu      %[length],        %[length],          -1              \n\t" | ||||
|     "slt        %[tmp1],          %[maximum],         %[value]        \n\t" | ||||
|     "movn       %[maximum],       %[value],           %[tmp1]         \n\t" | ||||
|     "bgtz       %[length],        1b                                  \n\t" | ||||
|     " addiu     %[vector],        %[vector],          2               \n\t" | ||||
|     ".set pop                                                         \n\t" | ||||
|  | ||||
|     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) | ||||
|     : [vector] "r" (vector), [length] "r" (length) | ||||
|     : "memory" | ||||
|   ); | ||||
|  | ||||
|   return maximum; | ||||
| } | ||||
|  | ||||
| // Maximum value of word32 vector. Version for MIPS platform. | ||||
| int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, int length) { | ||||
|   int32_t maximum = WEBRTC_SPL_WORD32_MIN; | ||||
|   int tmp1, value; | ||||
|  | ||||
|   if (vector == NULL || length <= 0) { | ||||
|     return maximum; | ||||
|   } | ||||
|  | ||||
|   __asm__ volatile ( | ||||
|     ".set push                                                        \n\t" | ||||
|     ".set noreorder                                                   \n\t" | ||||
|  | ||||
|    "1:                                                                \n\t" | ||||
|     "lw         %[value],         0(%[vector])                        \n\t" | ||||
|     "addiu      %[length],        %[length],          -1              \n\t" | ||||
|     "slt        %[tmp1],          %[maximum],         %[value]        \n\t" | ||||
|     "movn       %[maximum],       %[value],           %[tmp1]         \n\t" | ||||
|     "bgtz       %[length],        1b                                  \n\t" | ||||
|     " addiu     %[vector],        %[vector],          4               \n\t" | ||||
|  | ||||
|     ".set pop                                                         \n\t" | ||||
|  | ||||
|     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) | ||||
|     : [vector] "r" (vector), [length] "r" (length) | ||||
|     : "memory" | ||||
|   ); | ||||
|  | ||||
|   return maximum; | ||||
| } | ||||
|  | ||||
| // Minimum value of word16 vector. Version for MIPS platform. | ||||
| int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, int length) { | ||||
|   int16_t minimum = WEBRTC_SPL_WORD16_MAX; | ||||
|   int tmp1; | ||||
|   int16_t value; | ||||
|  | ||||
|   if (vector == NULL || length <= 0) { | ||||
|     return minimum; | ||||
|   } | ||||
|  | ||||
|   __asm__ volatile ( | ||||
|     ".set push                                                        \n\t" | ||||
|     ".set noreorder                                                   \n\t" | ||||
|  | ||||
|    "1:                                                                \n\t" | ||||
|     "lh         %[value],         0(%[vector])                        \n\t" | ||||
|     "addiu      %[length],        %[length],          -1              \n\t" | ||||
|     "slt        %[tmp1],          %[value],           %[minimum]      \n\t" | ||||
|     "movn       %[minimum],       %[value],           %[tmp1]         \n\t" | ||||
|     "bgtz       %[length],        1b                                  \n\t" | ||||
|     " addiu     %[vector],        %[vector],          2               \n\t" | ||||
|  | ||||
|     ".set pop                                                         \n\t" | ||||
|  | ||||
|     : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) | ||||
|     : [vector] "r" (vector), [length] "r" (length) | ||||
|     : "memory" | ||||
|   ); | ||||
|  | ||||
|   return minimum; | ||||
| } | ||||
|  | ||||
| // Minimum value of word32 vector. Version for MIPS platform. | ||||
| int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, int length) { | ||||
|   int32_t minimum = WEBRTC_SPL_WORD32_MAX; | ||||
|   int tmp1, value; | ||||
|  | ||||
|   if (vector == NULL || length <= 0) { | ||||
|     return minimum; | ||||
|   } | ||||
|  | ||||
|   __asm__ volatile ( | ||||
|     ".set push                                                        \n\t" | ||||
|     ".set noreorder                                                   \n\t" | ||||
|  | ||||
|    "1:                                                                \n\t" | ||||
|     "lw         %[value],         0(%[vector])                        \n\t" | ||||
|     "addiu      %[length],        %[length],          -1              \n\t" | ||||
|     "slt        %[tmp1],          %[value],           %[minimum]      \n\t" | ||||
|     "movn       %[minimum],       %[value],           %[tmp1]         \n\t" | ||||
|     "bgtz       %[length],        1b                                  \n\t" | ||||
|     " addiu     %[vector],        %[vector],          4               \n\t" | ||||
|  | ||||
|     ".set pop                                                         \n\t" | ||||
|  | ||||
|     : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) | ||||
|     : [vector] "r" (vector), [length] "r" (length) | ||||
|     : "memory" | ||||
|   ); | ||||
|  | ||||
|   return minimum; | ||||
| } | ||||
|  | ||||
| @@ -66,6 +66,7 @@ static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255}; | ||||
|  | ||||
|  | ||||
| // decimator | ||||
| #if !defined(MIPS32_LE) | ||||
| void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len, | ||||
|                              WebRtc_Word16* out, WebRtc_Word32* filtState) { | ||||
|   WebRtc_Word32 tmp1, tmp2, diff, in32, out32; | ||||
| @@ -121,6 +122,7 @@ void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len, | ||||
|   filtState[6] = state6; | ||||
|   filtState[7] = state7; | ||||
| } | ||||
| #endif  // #if defined(MIPS32_LE) | ||||
|  | ||||
|  | ||||
| void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, | ||||
|   | ||||
							
								
								
									
										291
									
								
								webrtc/common_audio/signal_processing/resample_by_2_mips.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										291
									
								
								webrtc/common_audio/signal_processing/resample_by_2_mips.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,291 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * This file contains the resampling by two functions. | ||||
|  * The description header can be found in signal_processing_library.h | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| #if defined(MIPS32_LE) | ||||
|  | ||||
| #include "signal_processing_library.h" | ||||
|  | ||||
| // allpass filter coefficients. | ||||
| static const WebRtc_UWord16 kResampleAllpass1[3] = {3284, 24441, 49528}; | ||||
| static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255}; | ||||
|  | ||||
| // Multiply a 32-bit value with a 16-bit value and accumulate to another input: | ||||
| #define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) | ||||
| #define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) | ||||
|  | ||||
| // decimator | ||||
| void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, | ||||
|                              const WebRtc_Word16 len, | ||||
|                              WebRtc_Word16* out, | ||||
|                              WebRtc_Word32* filtState) { | ||||
|   WebRtc_Word32 out32; | ||||
|   WebRtc_Word16 i, len1; | ||||
|  | ||||
|   register WebRtc_Word32 state0 = filtState[0]; | ||||
|   register WebRtc_Word32 state1 = filtState[1]; | ||||
|   register WebRtc_Word32 state2 = filtState[2]; | ||||
|   register WebRtc_Word32 state3 = filtState[3]; | ||||
|   register WebRtc_Word32 state4 = filtState[4]; | ||||
|   register WebRtc_Word32 state5 = filtState[5]; | ||||
|   register WebRtc_Word32 state6 = filtState[6]; | ||||
|   register WebRtc_Word32 state7 = filtState[7]; | ||||
|  | ||||
| #if defined(MIPS_DSP_R2_LE) | ||||
|   WebRtc_Word32 k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2; | ||||
|  | ||||
|   k1Res0= 3284; | ||||
|   k1Res1= 24441; | ||||
|   k1Res2= 49528; | ||||
|   k2Res0= 12199; | ||||
|   k2Res1= 37471; | ||||
|   k2Res2= 60255; | ||||
|   len1 = (len >> 1); | ||||
|  | ||||
|   const WebRtc_Word32* inw = (WebRtc_Word32*)in; | ||||
|   WebRtc_Word32 tmp11, tmp12, tmp21, tmp22; | ||||
|   WebRtc_Word32 in322, in321; | ||||
|   WebRtc_Word32 diff1, diff2; | ||||
|   for (i = len1; i > 0; i--) { | ||||
|     __asm__ volatile ( | ||||
|       "lh         %[in321],    0(%[inw])                  \n\t" | ||||
|       "lh         %[in322],    2(%[inw])                  \n\t" | ||||
|  | ||||
|       "sll        %[in321],    %[in321],      10          \n\t" | ||||
|       "sll        %[in322],    %[in322],      10          \n\t" | ||||
|  | ||||
|       "addiu      %[inw],      %[inw],        4           \n\t" | ||||
|  | ||||
|       "subu       %[diff1],    %[in321],      %[state1]   \n\t" | ||||
|       "subu       %[diff2],    %[in322],      %[state5]   \n\t" | ||||
|  | ||||
|       : [in322] "=&r" (in322), [in321] "=&r" (in321), | ||||
|         [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw) | ||||
|       : [state1] "r" (state1), [state5] "r" (state5) | ||||
|       : "memory" | ||||
|     ); | ||||
|  | ||||
|     __asm__ volatile ( | ||||
|       "mult       $ac0,       %[diff1],       %[k2Res0]   \n\t" | ||||
|       "mult       $ac1,       %[diff2],       %[k1Res0]   \n\t" | ||||
|  | ||||
|       "extr.w     %[tmp11],   $ac0,           16          \n\t" | ||||
|       "extr.w     %[tmp12],   $ac1,           16          \n\t" | ||||
|  | ||||
|       "addu       %[tmp11],   %[state0],      %[tmp11]    \n\t" | ||||
|       "addu       %[tmp12],   %[state4],      %[tmp12]    \n\t" | ||||
|  | ||||
|       "addiu      %[state0],  %[in321],       0           \n\t" | ||||
|       "addiu      %[state4],  %[in322],       0           \n\t" | ||||
|  | ||||
|       "subu       %[diff1],   %[tmp11],       %[state2]   \n\t" | ||||
|       "subu       %[diff2],   %[tmp12],       %[state6]   \n\t" | ||||
|  | ||||
|       "mult       $ac0,       %[diff1],       %[k2Res1]   \n\t" | ||||
|       "mult       $ac1,       %[diff2],       %[k1Res1]   \n\t" | ||||
|  | ||||
|       "extr.w     %[tmp21],   $ac0,           16          \n\t" | ||||
|       "extr.w     %[tmp22],   $ac1,           16          \n\t" | ||||
|  | ||||
|       "addu       %[tmp21],   %[state1],      %[tmp21]    \n\t" | ||||
|       "addu       %[tmp22],   %[state5],      %[tmp22]    \n\t" | ||||
|  | ||||
|       "addiu      %[state1],  %[tmp11],       0           \n\t" | ||||
|       "addiu      %[state5],  %[tmp12],       0           \n\t" | ||||
|       : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21), | ||||
|         [tmp11] "=&r" (tmp11), [state0] "+r" (state0), | ||||
|         [state1] "+r" (state1), | ||||
|         [state2] "+r" (state2), | ||||
|         [state4] "+r" (state4), [tmp12] "=&r" (tmp12), | ||||
|         [state6] "+r" (state6), [state5] "+r" (state5) | ||||
|       : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0), | ||||
|         [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322), | ||||
|         [in321] "r" (in321), [k1Res0] "r" (k1Res0) | ||||
|       : "hi", "lo", "$ac1hi", "$ac1lo" | ||||
|     ); | ||||
|  | ||||
|     // upper allpass filter | ||||
|     __asm__ volatile ( | ||||
|       "subu       %[diff1],   %[tmp21],       %[state3]   \n\t" | ||||
|       "subu       %[diff2],   %[tmp22],       %[state7]   \n\t" | ||||
|  | ||||
|       "mult       $ac0,       %[diff1],       %[k2Res2]   \n\t" | ||||
|       "mult       $ac1,       %[diff2],       %[k1Res2]   \n\t" | ||||
|       "extr.w     %[state3],  $ac0,           16          \n\t" | ||||
|       "extr.w     %[state7],  $ac1,           16          \n\t" | ||||
|       "addu       %[state3],  %[state2],      %[state3]   \n\t" | ||||
|       "addu       %[state7],  %[state6],      %[state7]   \n\t" | ||||
|  | ||||
|       "addiu      %[state2],  %[tmp21],       0           \n\t" | ||||
|       "addiu      %[state6],  %[tmp22],       0           \n\t" | ||||
|  | ||||
|       // add two allpass outputs, divide by two and round | ||||
|       "addu       %[out32],   %[state3],      %[state7]   \n\t" | ||||
|       "addiu      %[out32],   %[out32],       1024        \n\t" | ||||
|       "sra        %[out32],   %[out32],       11          \n\t" | ||||
|       : [state3] "+r" (state3), [state6] "+r" (state6), | ||||
|         [state2] "+r" (state2), [diff2] "=&r" (diff2), | ||||
|         [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7) | ||||
|       : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21), | ||||
|         [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2) | ||||
|       : "hi", "lo", "$ac1hi", "$ac1lo" | ||||
|     ); | ||||
|  | ||||
|     // limit amplitude to prevent wrap-around, and write to output array | ||||
|     *out++ = WebRtcSpl_SatW32ToW16(out32); | ||||
|   } | ||||
| #else  // #if defined(MIPS_DSP_R2_LE) | ||||
|   WebRtc_Word32 tmp1, tmp2, diff; | ||||
|   WebRtc_Word32 in32; | ||||
|   len1 = (len >> 1)/4; | ||||
|   for (i = len1; i > 0; i--) { | ||||
|     // lower allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state1; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); | ||||
|     state0 = in32; | ||||
|     diff = tmp1 - state2; | ||||
|     tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); | ||||
|     state1 = tmp1; | ||||
|     diff = tmp2 - state3; | ||||
|     state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); | ||||
|     state2 = tmp2; | ||||
|  | ||||
|     // upper allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state5; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); | ||||
|     state4 = in32; | ||||
|     diff = tmp1 - state6; | ||||
|     tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); | ||||
|     state5 = tmp1; | ||||
|     diff = tmp2 - state7; | ||||
|     state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); | ||||
|     state6 = tmp2; | ||||
|  | ||||
|     // add two allpass outputs, divide by two and round | ||||
|     out32 = (state3 + state7 + 1024) >> 11; | ||||
|  | ||||
|     // limit amplitude to prevent wrap-around, and write to output array | ||||
|     *out++ = WebRtcSpl_SatW32ToW16(out32); | ||||
|     // lower allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state1; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); | ||||
|     state0 = in32; | ||||
|     diff = tmp1 - state2; | ||||
|     tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); | ||||
|     state1 = tmp1; | ||||
|     diff = tmp2 - state3; | ||||
|     state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); | ||||
|     state2 = tmp2; | ||||
|  | ||||
|     // upper allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state5; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); | ||||
|     state4 = in32; | ||||
|     diff = tmp1 - state6; | ||||
|     tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); | ||||
|     state5 = tmp1; | ||||
|     diff = tmp2 - state7; | ||||
|     state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); | ||||
|     state6 = tmp2; | ||||
|  | ||||
|     // add two allpass outputs, divide by two and round | ||||
|     out32 = (state3 + state7 + 1024) >> 11; | ||||
|  | ||||
|     // limit amplitude to prevent wrap-around, and write to output array | ||||
|     *out++ = WebRtcSpl_SatW32ToW16(out32); | ||||
|     // lower allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state1; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); | ||||
|     state0 = in32; | ||||
|     diff = tmp1 - state2; | ||||
|     tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); | ||||
|     state1 = tmp1; | ||||
|     diff = tmp2 - state3; | ||||
|     state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); | ||||
|     state2 = tmp2; | ||||
|  | ||||
|     // upper allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state5; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); | ||||
|     state4 = in32; | ||||
|     diff = tmp1 - state6; | ||||
|     tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); | ||||
|     state5 = tmp1; | ||||
|     diff = tmp2 - state7; | ||||
|     state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); | ||||
|     state6 = tmp2; | ||||
|  | ||||
|     // add two allpass outputs, divide by two and round | ||||
|     out32 = (state3 + state7 + 1024) >> 11; | ||||
|  | ||||
|     // limit amplitude to prevent wrap-around, and write to output array | ||||
|     *out++ = WebRtcSpl_SatW32ToW16(out32); | ||||
|     // lower allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state1; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); | ||||
|     state0 = in32; | ||||
|     diff = tmp1 - state2; | ||||
|     tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); | ||||
|     state1 = tmp1; | ||||
|     diff = tmp2 - state3; | ||||
|     state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); | ||||
|     state2 = tmp2; | ||||
|  | ||||
|     // upper allpass filter | ||||
|     in32 = (WebRtc_Word32)(*in++) << 10; | ||||
|     diff = in32 - state5; | ||||
|     tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); | ||||
|     state4 = in32; | ||||
|     diff = tmp1 - state6; | ||||
|     tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); | ||||
|     state5 = tmp1; | ||||
|     diff = tmp2 - state7; | ||||
|     state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); | ||||
|     state6 = tmp2; | ||||
|  | ||||
|     // add two allpass outputs, divide by two and round | ||||
|     out32 = (state3 + state7 + 1024) >> 11; | ||||
|  | ||||
|     // limit amplitude to prevent wrap-around, and write to output array | ||||
|     *out++ = WebRtcSpl_SatW32ToW16(out32); | ||||
|   } | ||||
| #endif  // #if defined(MIPS_DSP_R2_LE) | ||||
|   __asm__ volatile ( | ||||
|     "sw       %[state0],      0(%[filtState])     \n\t" | ||||
|     "sw       %[state1],      4(%[filtState])     \n\t" | ||||
|     "sw       %[state2],      8(%[filtState])     \n\t" | ||||
|     "sw       %[state3],      12(%[filtState])    \n\t" | ||||
|     "sw       %[state4],      16(%[filtState])    \n\t" | ||||
|     "sw       %[state5],      20(%[filtState])    \n\t" | ||||
|     "sw       %[state6],      24(%[filtState])    \n\t" | ||||
|     "sw       %[state7],      28(%[filtState])    \n\t" | ||||
|     : | ||||
|     : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2), | ||||
|       [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5), | ||||
|       [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState) | ||||
|     : "memory" | ||||
|   ); | ||||
| } | ||||
|  | ||||
| #endif  // #if defined(MIPS32_LE) | ||||
|  | ||||
| @@ -84,6 +84,12 @@ | ||||
|             }], | ||||
|           ], | ||||
|         }], | ||||
|         ['target_arch=="mipsel"', { | ||||
|           'sources': [ | ||||
|             'min_max_operations_mips.c', | ||||
|             'resample_by_2_mips.c', | ||||
|           ], | ||||
|         }], | ||||
|       ], | ||||
|       # Ignore warning on shift operator promotion. | ||||
|       'msvs_disabled_warnings': [ 4334, ], | ||||
|   | ||||
| @@ -31,7 +31,8 @@ ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; | ||||
| RealForwardFFT WebRtcSpl_RealForwardFFT; | ||||
| RealInverseFFT WebRtcSpl_RealInverseFFT; | ||||
|  | ||||
| #if defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON) | ||||
| #if (defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON)) && \ | ||||
|      !defined(MIPS32_LE) | ||||
| /* Initialize function pointers to the generic C version. */ | ||||
| static void InitPointersToC() { | ||||
|   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; | ||||
| @@ -67,6 +68,28 @@ static void InitPointersToNeon() { | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #if defined(MIPS32_LE) | ||||
| /* Initialize function pointers to the MIPS version. */ | ||||
| static void InitPointersToMIPS() { | ||||
|   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; | ||||
|   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; | ||||
|   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; | ||||
|   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; | ||||
|   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; | ||||
|   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; | ||||
|   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; | ||||
|   WebRtcSpl_ScaleAndAddVectorsWithRound = | ||||
|       WebRtcSpl_ScaleAndAddVectorsWithRoundC; | ||||
|   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC; | ||||
|   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC; | ||||
| #if defined(MIPS_DSP_R1_LE) | ||||
|   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; | ||||
| #else | ||||
|   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; | ||||
| #endif | ||||
| } | ||||
| #endif | ||||
|  | ||||
| static void InitFunctionPointers(void) { | ||||
| #if defined(WEBRTC_DETECT_ARM_NEON) | ||||
|   if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { | ||||
| @@ -76,6 +99,8 @@ static void InitFunctionPointers(void) { | ||||
|   } | ||||
| #elif defined(WEBRTC_ARCH_ARM_NEON) | ||||
|   InitPointersToNeon(); | ||||
| #elif defined(MIPS32_LE) | ||||
|   InitPointersToMIPS(); | ||||
| #else | ||||
|   InitPointersToC(); | ||||
| #endif  /* WEBRTC_DETECT_ARM_NEON */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 andrew@webrtc.org
					andrew@webrtc.org