avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for VP9 lpf functions
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Reviewed-by: "Ronald S. Bultje" <rsbultje@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
		
				
					committed by
					
						
						Michael Niedermayer
					
				
			
			
				
	
			
			
			
						parent
						
							52c75d486e
						
					
				
				
					commit
					fd7eadd25c
				
			@@ -43,7 +43,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_msa.o            \
 | 
				
			|||||||
                                             mips/hevc_idct_msa.o          \
 | 
					                                             mips/hevc_idct_msa.o          \
 | 
				
			||||||
                                             mips/hevc_lpf_sao_msa.o       \
 | 
					                                             mips/hevc_lpf_sao_msa.o       \
 | 
				
			||||||
                                             mips/hevcpred_msa.o
 | 
					                                             mips/hevcpred_msa.o
 | 
				
			||||||
MSA-OBJS-$(CONFIG_VP9_DECODER)            += mips/vp9_mc_msa.o
 | 
					MSA-OBJS-$(CONFIG_VP9_DECODER)            += mips/vp9_mc_msa.o             \
 | 
				
			||||||
 | 
					                                             mips/vp9_lpf_msa.o
 | 
				
			||||||
MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o            \
 | 
					MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o            \
 | 
				
			||||||
                                             mips/h264idct_msa.o
 | 
					                                             mips/h264idct_msa.o
 | 
				
			||||||
MSA-OBJS-$(CONFIG_H264QPEL)               += mips/h264qpel_msa.o
 | 
					MSA-OBJS-$(CONFIG_H264QPEL)               += mips/h264qpel_msa.o
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										2599
									
								
								libavcodec/mips/vp9_lpf_msa.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2599
									
								
								libavcodec/mips/vp9_lpf_msa.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -80,9 +80,34 @@ static av_cold void vp9dsp_mc_init_msa(VP9DSPContext *dsp, int bpp)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static av_cold void vp9dsp_loopfilter_init_msa(VP9DSPContext *dsp, int bpp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (bpp == 8) {
 | 
				
			||||||
 | 
					        dsp->loop_filter_8[0][0] = ff_loop_filter_h_4_8_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_8[0][1] = ff_loop_filter_v_4_8_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_8[1][0] = ff_loop_filter_h_8_8_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_8[1][1] = ff_loop_filter_v_8_8_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_8[2][0] = ff_loop_filter_h_16_8_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_8[2][1] = ff_loop_filter_v_16_8_msa;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        dsp->loop_filter_16[0] = ff_loop_filter_h_16_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_16[1] = ff_loop_filter_v_16_16_msa;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[0][0][0] = ff_loop_filter_h_44_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[0][0][1] = ff_loop_filter_v_44_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[0][1][0] = ff_loop_filter_h_48_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[0][1][1] = ff_loop_filter_v_48_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[1][0][0] = ff_loop_filter_h_84_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[1][0][1] = ff_loop_filter_v_84_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[1][1][0] = ff_loop_filter_h_88_16_msa;
 | 
				
			||||||
 | 
					        dsp->loop_filter_mix2[1][1][1] = ff_loop_filter_v_88_16_msa;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static av_cold void vp9dsp_init_msa(VP9DSPContext *dsp, int bpp)
 | 
					static av_cold void vp9dsp_init_msa(VP9DSPContext *dsp, int bpp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    vp9dsp_mc_init_msa(dsp, bpp);
 | 
					    vp9dsp_mc_init_msa(dsp, bpp);
 | 
				
			||||||
 | 
					    vp9dsp_loopfilter_init_msa(dsp, bpp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif  // #if HAVE_MSA
 | 
					#endif  // #if HAVE_MSA
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -88,4 +88,37 @@ VP9_COPY_AVG_MIPS_MSA_FUNC(4);
 | 
				
			|||||||
#undef VP9_8TAP_MIPS_MSA_FUNC
 | 
					#undef VP9_8TAP_MIPS_MSA_FUNC
 | 
				
			||||||
#undef VP9_COPY_AVG_MIPS_MSA_FUNC
 | 
					#undef VP9_COPY_AVG_MIPS_MSA_FUNC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ff_loop_filter_h_4_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                              int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_8_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                              int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_16_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                               int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_4_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                              int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_8_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                              int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_16_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                               int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_44_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_88_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_16_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_44_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_88_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_16_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_48_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_h_84_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_48_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					void ff_loop_filter_v_84_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
 | 
				
			||||||
 | 
					                                int32_t i, int32_t h);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif  // #ifndef AVCODEC_MIPS_VP9DSP_MIPS_H
 | 
					#endif  // #ifndef AVCODEC_MIPS_VP9DSP_MIPS_H
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -24,6 +24,9 @@
 | 
				
			|||||||
#include <stdint.h>
 | 
					#include <stdint.h>
 | 
				
			||||||
#include <msa.h>
 | 
					#include <msa.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define ALIGNMENT           16
 | 
				
			||||||
 | 
					#define ALLOC_ALIGNED(align) __attribute__ ((aligned((align) << 1)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define LD_B(RTYPE, psrc) *((RTYPE *)(psrc))
 | 
					#define LD_B(RTYPE, psrc) *((RTYPE *)(psrc))
 | 
				
			||||||
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
 | 
					#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
 | 
				
			||||||
#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
 | 
					#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user