Migrate loop filter functions from vp9/ to vpx_dsp/
The various tap loop filter operations are common functions across codec. This commit moves them along with SIMD optimizations to vpx_dsp folder. Change-Id: Ia5fa0b2e5289cdb98467502a549c380b9c60e92c
This commit is contained in:
		@@ -19,7 +19,7 @@
 | 
				
			|||||||
#include "test/util.h"
 | 
					#include "test/util.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vpx_config.h"
 | 
					#include "./vpx_config.h"
 | 
				
			||||||
#include "./vp9_rtcd.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "vp9/common/vp9_entropy.h"
 | 
					#include "vp9/common/vp9_entropy.h"
 | 
				
			||||||
#include "vp9/common/vp9_loopfilter.h"
 | 
					#include "vp9/common/vp9_loopfilter.h"
 | 
				
			||||||
#include "vpx/vpx_integer.h"
 | 
					#include "vpx/vpx_integer.h"
 | 
				
			||||||
@@ -665,11 +665,11 @@ INSTANTIATE_TEST_CASE_P(
 | 
				
			|||||||
                   &wrapper_vertical_16_c, 8, 1),
 | 
					                   &wrapper_vertical_16_c, 8, 1),
 | 
				
			||||||
        make_tuple(&wrapper_vertical_16_dual_neon,
 | 
					        make_tuple(&wrapper_vertical_16_dual_neon,
 | 
				
			||||||
                   &wrapper_vertical_16_dual_c, 8, 1),
 | 
					                   &wrapper_vertical_16_dual_c, 8, 1),
 | 
				
			||||||
 | 
					#endif  // HAVE_NEON_ASM
 | 
				
			||||||
        make_tuple(&vp9_lpf_horizontal_8_neon,
 | 
					        make_tuple(&vp9_lpf_horizontal_8_neon,
 | 
				
			||||||
                   &vp9_lpf_horizontal_8_c, 8, 1),
 | 
					                   &vp9_lpf_horizontal_8_c, 8, 1),
 | 
				
			||||||
        make_tuple(&vp9_lpf_vertical_8_neon,
 | 
					        make_tuple(&vp9_lpf_vertical_8_neon,
 | 
				
			||||||
                   &vp9_lpf_vertical_8_c, 8, 1),
 | 
					                   &vp9_lpf_vertical_8_c, 8, 1),
 | 
				
			||||||
#endif  // HAVE_NEON_ASM
 | 
					 | 
				
			||||||
        make_tuple(&vp9_lpf_horizontal_4_neon,
 | 
					        make_tuple(&vp9_lpf_horizontal_4_neon,
 | 
				
			||||||
                   &vp9_lpf_horizontal_4_c, 8, 1),
 | 
					                   &vp9_lpf_horizontal_4_c, 8, 1),
 | 
				
			||||||
        make_tuple(&vp9_lpf_vertical_4_neon,
 | 
					        make_tuple(&vp9_lpf_vertical_4_neon,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,6 +9,7 @@
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vpx_config.h"
 | 
					#include "./vpx_config.h"
 | 
				
			||||||
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "vp9/common/vp9_loopfilter.h"
 | 
					#include "vp9/common/vp9_loopfilter.h"
 | 
				
			||||||
#include "vp9/common/vp9_onyxc_int.h"
 | 
					#include "vp9/common/vp9_onyxc_int.h"
 | 
				
			||||||
#include "vp9/common/vp9_reconinter.h"
 | 
					#include "vp9/common/vp9_reconinter.h"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -219,49 +219,6 @@ specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
 | 
				
			|||||||
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
					add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 | 
				
			||||||
specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
 | 
					specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Loopfilter
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_vertical_4 mmx neon dspr2 msa/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2 msa/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
 | 
					 | 
				
			||||||
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2 msa/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
					 | 
				
			||||||
specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# post proc
 | 
					# post proc
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
@@ -667,42 +624,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 | 
				
			|||||||
  add_proto qw/void vp9_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
 | 
					  add_proto qw/void vp9_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
 | 
				
			||||||
  specialize qw/vp9_highbd_convolve8_avg_vert/, "$sse2_x86_64";
 | 
					  specialize qw/vp9_highbd_convolve8_avg_vert/, "$sse2_x86_64";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  #
 | 
					 | 
				
			||||||
  # Loopfilter
 | 
					 | 
				
			||||||
  #
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_vertical_16 sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_vertical_8 sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_vertical_4 sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
					 | 
				
			||||||
  specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  #
 | 
					  #
 | 
				
			||||||
  # post proc
 | 
					  # post proc
 | 
				
			||||||
  #
 | 
					  #
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -54,7 +54,6 @@ VP9_COMMON_SRCS-yes += common/vp9_textblit.h
 | 
				
			|||||||
VP9_COMMON_SRCS-yes += common/vp9_tile_common.h
 | 
					VP9_COMMON_SRCS-yes += common/vp9_tile_common.h
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
 | 
					VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
 | 
					VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/vp9_loopfilter_filters.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/vp9_thread_common.c
 | 
					VP9_COMMON_SRCS-yes += common/vp9_thread_common.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c
 | 
					VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h
 | 
					VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h
 | 
				
			||||||
@@ -69,14 +68,11 @@ VP9_COMMON_SRCS-yes += common/vp9_scan.h
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
 | 
					VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
 | 
				
			||||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
 | 
					VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
 | 
				
			||||||
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
 | 
					VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
 | 
				
			||||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
 | 
					VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
 | 
				
			||||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
 | 
					VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
 | 
				
			||||||
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
 | 
					VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
 | 
					VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
 | 
					VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
 | 
					VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
 | 
					VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
 | 
				
			||||||
@@ -95,7 +91,6 @@ VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
 | 
				
			|||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 | 
					ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_loopfilter_intrin_sse2.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
 | 
					VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
 | 
					VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
 | 
				
			||||||
ifeq ($(CONFIG_USE_X86INC),yes)
 | 
					ifeq ($(CONFIG_USE_X86INC),yes)
 | 
				
			||||||
@@ -147,10 +142,6 @@ VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
 | 
				
			|||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
 | 
					VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
 | 
					VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
 | 
					VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_msa.h
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(CONFIG_VP9_POSTPROC),yes)
 | 
					ifeq ($(CONFIG_VP9_POSTPROC),yes)
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
 | 
					VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
 | 
				
			||||||
@@ -165,16 +156,12 @@ VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3_x86_64.asm
 | 
				
			|||||||
endif
 | 
					endif
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM)
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_8_neon_asm$(ASM)
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
 | 
					VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 | 
					ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
 | 
					VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
 | 
					VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon.c
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# neon with assembly and intrinsics implementations. If both are available
 | 
					# neon with assembly and intrinsics implementations. If both are available
 | 
				
			||||||
# prefer assembly.
 | 
					# prefer assembly.
 | 
				
			||||||
@@ -193,7 +180,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon_asm$(ASM)
 | 
				
			|||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM)
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM)
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM)
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM)
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM)
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM)
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon_asm$(ASM)
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon_asm$(ASM)
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon_asm$(ASM)
 | 
				
			||||||
else
 | 
					else
 | 
				
			||||||
ifeq ($(HAVE_NEON), yes)
 | 
					ifeq ($(HAVE_NEON), yes)
 | 
				
			||||||
@@ -211,11 +197,6 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_1_add_neon.c
 | 
				
			|||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon.c
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon.c
 | 
					VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon.c
 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_16_neon.c
 | 
					 | 
				
			||||||
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon.c
 | 
					 | 
				
			||||||
# TODO(johannkoenig): re-enable when chromium build is fixed
 | 
					 | 
				
			||||||
# # https://code.google.com/p/chromium/issues/detail?id=443839
 | 
					 | 
				
			||||||
#VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_8_neon.c
 | 
					 | 
				
			||||||
endif  # HAVE_NEON
 | 
					endif  # HAVE_NEON
 | 
				
			||||||
endif  # HAVE_NEON_ASM
 | 
					endif  # HAVE_NEON_ASM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,7 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <arm_neon.h>
 | 
					#include <arm_neon.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vp9_rtcd.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "./vpx_config.h"
 | 
					#include "./vpx_config.h"
 | 
				
			||||||
#include "vpx/vpx_integer.h"
 | 
					#include "vpx/vpx_integer.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -10,7 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <arm_neon.h>
 | 
					#include <arm_neon.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vpx_config.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static INLINE void vp9_loop_filter_neon(
 | 
					static INLINE void vp9_loop_filter_neon(
 | 
				
			||||||
        uint8x8_t dblimit,    // flimit
 | 
					        uint8x8_t dblimit,    // flimit
 | 
				
			||||||
@@ -111,11 +111,11 @@ static INLINE void vp9_loop_filter_neon(
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void vp9_lpf_horizontal_4_neon(
 | 
					void vp9_lpf_horizontal_4_neon(
 | 
				
			||||||
        unsigned char *src,
 | 
					        uint8_t *src,
 | 
				
			||||||
        int pitch,
 | 
					        int pitch,
 | 
				
			||||||
        unsigned char *blimit,
 | 
					        const uint8_t *blimit,
 | 
				
			||||||
        unsigned char *limit,
 | 
					        const uint8_t *limit,
 | 
				
			||||||
        unsigned char *thresh,
 | 
					        const uint8_t *thresh,
 | 
				
			||||||
        int count) {
 | 
					        int count) {
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    uint8_t *s, *psrc;
 | 
					    uint8_t *s, *psrc;
 | 
				
			||||||
@@ -166,11 +166,11 @@ void vp9_lpf_horizontal_4_neon(
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void vp9_lpf_vertical_4_neon(
 | 
					void vp9_lpf_vertical_4_neon(
 | 
				
			||||||
        unsigned char *src,
 | 
					        uint8_t *src,
 | 
				
			||||||
        int pitch,
 | 
					        int pitch,
 | 
				
			||||||
        unsigned char *blimit,
 | 
					        const uint8_t *blimit,
 | 
				
			||||||
        unsigned char *limit,
 | 
					        const uint8_t *limit,
 | 
				
			||||||
        unsigned char *thresh,
 | 
					        const uint8_t *thresh,
 | 
				
			||||||
        int count) {
 | 
					        int count) {
 | 
				
			||||||
    int i, pitch8;
 | 
					    int i, pitch8;
 | 
				
			||||||
    uint8_t *s;
 | 
					    uint8_t *s;
 | 
				
			||||||
@@ -10,7 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <arm_neon.h>
 | 
					#include <arm_neon.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vpx_config.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static INLINE void vp9_mbloop_filter_neon(
 | 
					static INLINE void vp9_mbloop_filter_neon(
 | 
				
			||||||
        uint8x8_t dblimit,   // mblimit
 | 
					        uint8x8_t dblimit,   // mblimit
 | 
				
			||||||
@@ -264,11 +264,11 @@ static INLINE void vp9_mbloop_filter_neon(
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void vp9_lpf_horizontal_8_neon(
 | 
					void vp9_lpf_horizontal_8_neon(
 | 
				
			||||||
        unsigned char *src,
 | 
					        uint8_t *src,
 | 
				
			||||||
        int pitch,
 | 
					        int pitch,
 | 
				
			||||||
        unsigned char *blimit,
 | 
					        const uint8_t *blimit,
 | 
				
			||||||
        unsigned char *limit,
 | 
					        const uint8_t *limit,
 | 
				
			||||||
        unsigned char *thresh,
 | 
					        const uint8_t *thresh,
 | 
				
			||||||
        int count) {
 | 
					        int count) {
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    uint8_t *s, *psrc;
 | 
					    uint8_t *s, *psrc;
 | 
				
			||||||
@@ -324,11 +324,11 @@ void vp9_lpf_horizontal_8_neon(
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void vp9_lpf_vertical_8_neon(
 | 
					void vp9_lpf_vertical_8_neon(
 | 
				
			||||||
        unsigned char *src,
 | 
					        uint8_t *src,
 | 
				
			||||||
        int pitch,
 | 
					        int pitch,
 | 
				
			||||||
        unsigned char *blimit,
 | 
					        const uint8_t *blimit,
 | 
				
			||||||
        unsigned char *limit,
 | 
					        const uint8_t *limit,
 | 
				
			||||||
        unsigned char *thresh,
 | 
					        const uint8_t *thresh,
 | 
				
			||||||
        int count) {
 | 
					        int count) {
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    uint8_t *s;
 | 
					    uint8_t *s;
 | 
				
			||||||
@@ -10,7 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <arm_neon.h>
 | 
					#include <arm_neon.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vp9_rtcd.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "./vpx_config.h"
 | 
					#include "./vpx_config.h"
 | 
				
			||||||
#include "vpx/vpx_integer.h"
 | 
					#include "vpx/vpx_integer.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 | 
					 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *  Use of this source code is governed by a BSD-style license
 | 
					 *  Use of this source code is governed by a BSD-style license
 | 
				
			||||||
 *  that can be found in the LICENSE file in the root of the source
 | 
					 *  that can be found in the LICENSE file in the root of the source
 | 
				
			||||||
@@ -9,7 +9,7 @@
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "vpx_ports/mem.h"
 | 
					#include "vpx_ports/mem.h"
 | 
				
			||||||
#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
 | 
					#include "vpx_dsp/mips/loopfilter_msa.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
 | 
					int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
 | 
				
			||||||
                                 uint8_t *filter48,
 | 
					                                 uint8_t *filter48,
 | 
				
			||||||
@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *  be found in the AUTHORS file in the root of the source tree.
 | 
					 *  be found in the AUTHORS file in the root of the source tree.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
 | 
					#include "vpx_dsp/mips/loopfilter_msa.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
 | 
					void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
 | 
				
			||||||
                              const uint8_t *b_limit_ptr,
 | 
					                              const uint8_t *b_limit_ptr,
 | 
				
			||||||
@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *  be found in the AUTHORS file in the root of the source tree.
 | 
					 *  be found in the AUTHORS file in the root of the source tree.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
 | 
					#include "vpx_dsp/mips/loopfilter_msa.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
 | 
					void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
 | 
				
			||||||
                              const uint8_t *b_limit_ptr,
 | 
					                              const uint8_t *b_limit_ptr,
 | 
				
			||||||
@@ -8,8 +8,8 @@
 | 
				
			|||||||
 *  be found in the AUTHORS file in the root of the source tree.
 | 
					 *  be found in the AUTHORS file in the root of the source tree.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
 | 
					#ifndef VPX_DSP_LOOPFILTER_MSA_H_
 | 
				
			||||||
#define VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
 | 
					#define VPX_DSP_LOOPFILTER_MSA_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "vpx_dsp/mips/macros_msa.h"
 | 
					#include "vpx_dsp/mips/macros_msa.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -243,4 +243,4 @@
 | 
				
			|||||||
  mask_out = limit_in < (v16u8)mask_out;                         \
 | 
					  mask_out = limit_in < (v16u8)mask_out;                         \
 | 
				
			||||||
  mask_out = __msa_xori_b(mask_out, 0xff);                       \
 | 
					  mask_out = __msa_xori_b(mask_out, 0xff);                       \
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif  /* VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ */
 | 
					#endif  /* VPX_DSP_LOOPFILTER_MSA_H_ */
 | 
				
			||||||
@@ -13,6 +13,36 @@ DSP_SRCS-yes += vpx_dsp_common.h
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
DSP_SRCS-$(HAVE_MSA)    += mips/macros_msa.h
 | 
					DSP_SRCS-$(HAVE_MSA)    += mips/macros_msa.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# loop filters
 | 
				
			||||||
 | 
					DSP_SRCS-yes += loopfilter.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64)   += x86/loopfilter_sse2.c
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_AVX2)                += x86/loopfilter_avx2.c
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_MMX)                 += x86/loopfilter_mmx.asm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_NEON)   += arm/loopfilter_neon.c
 | 
				
			||||||
 | 
					ifeq ($(HAVE_NEON_ASM),yes)
 | 
				
			||||||
 | 
					DSP_SRCS-yes  += arm/loopfilter_mb_neon$(ASM)
 | 
				
			||||||
 | 
					DSP_SRCS-yes  += arm/loopfilter_16_neon$(ASM)
 | 
				
			||||||
 | 
					DSP_SRCS-yes  += arm/loopfilter_8_neon$(ASM)
 | 
				
			||||||
 | 
					DSP_SRCS-yes  += arm/loopfilter_4_neon$(ASM)
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					ifeq ($(HAVE_NEON),yes)
 | 
				
			||||||
 | 
					DSP_SRCS-yes   += arm/loopfilter_16_neon.c
 | 
				
			||||||
 | 
					DSP_SRCS-yes   += arm/loopfilter_8_neon.c
 | 
				
			||||||
 | 
					DSP_SRCS-yes   += arm/loopfilter_4_neon.c
 | 
				
			||||||
 | 
					endif  # HAVE_NEON
 | 
				
			||||||
 | 
					endif  # HAVE_NEON_ASM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_MSA)    += mips/loopfilter_msa.h
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_MSA)    += mips/loopfilter_16_msa.c
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_MSA)    += mips/loopfilter_8_msa.c
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_MSA)    += mips/loopfilter_4_msa.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 | 
				
			||||||
 | 
					DSP_SRCS-$(HAVE_SSE2)   += x86/highbd_loopfilter_sse2.c
 | 
				
			||||||
 | 
					endif  # CONFIG_VP9_HIGHBITDEPTH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifeq ($(CONFIG_ENCODERS),yes)
 | 
					ifeq ($(CONFIG_ENCODERS),yes)
 | 
				
			||||||
DSP_SRCS-yes            += sad.c
 | 
					DSP_SRCS-yes            += sad.c
 | 
				
			||||||
DSP_SRCS-yes            += subtract.c
 | 
					DSP_SRCS-yes            += subtract.c
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,6 +35,82 @@ if ($opts{arch} eq "x86_64") {
 | 
				
			|||||||
  $avx_x86_64 = $avx2_x86_64 = '';
 | 
					  $avx_x86_64 = $avx2_x86_64 = '';
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# Loopfilter
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_vertical_16 sse2 neon_asm msa/;
 | 
				
			||||||
 | 
					$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm msa/;
 | 
				
			||||||
 | 
					$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_vertical_8 sse2 neon msa/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm msa/;
 | 
				
			||||||
 | 
					$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_vertical_4 mmx neon msa/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_vertical_4_dual sse2 neon msa/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm msa/;
 | 
				
			||||||
 | 
					$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_horizontal_8 sse2 neon msa/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm msa/;
 | 
				
			||||||
 | 
					$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_horizontal_4 mmx neon msa/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 | 
				
			||||||
 | 
					specialize qw/vp9_lpf_horizontal_4_dual sse2 neon msa/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_vertical_16 sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_vertical_8 sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_vertical_4 sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
 | 
				
			||||||
 | 
					  specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;
 | 
				
			||||||
 | 
					}  # CONFIG_VP9_HIGHBITDEPTH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
 | 
					if (vpx_config("CONFIG_ENCODERS") eq "yes") {
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# Block subtraction
 | 
					# Block subtraction
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,7 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <emmintrin.h>  // SSE2
 | 
					#include <emmintrin.h>  // SSE2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vp9_rtcd.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "vpx_ports/mem.h"
 | 
					#include "vpx_ports/mem.h"
 | 
				
			||||||
#include "vp9/common/vp9_loopfilter.h"
 | 
					#include "vp9/common/vp9_loopfilter.h"
 | 
				
			||||||
#include "vpx_ports/emmintrin_compat.h"
 | 
					#include "vpx_ports/emmintrin_compat.h"
 | 
				
			||||||
@@ -10,7 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <immintrin.h>  /* AVX2 */
 | 
					#include <immintrin.h>  /* AVX2 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vp9_rtcd.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "vpx_ports/mem.h"
 | 
					#include "vpx_ports/mem.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
 | 
					static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
 | 
				
			||||||
@@ -10,8 +10,8 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <emmintrin.h>  // SSE2
 | 
					#include <emmintrin.h>  // SSE2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "./vp9_rtcd.h"
 | 
					#include "./vpx_dsp_rtcd.h"
 | 
				
			||||||
#include "vp9/common/vp9_loopfilter.h"
 | 
					#include "vpx_ports/mem.h"
 | 
				
			||||||
#include "vpx_ports/emmintrin_compat.h"
 | 
					#include "vpx_ports/emmintrin_compat.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static INLINE __m128i abs_diff(__m128i a, __m128i b) {
 | 
					static INLINE __m128i abs_diff(__m128i a, __m128i b) {
 | 
				
			||||||
		Reference in New Issue
	
	Block a user