From 318fc0c34f142b9391b957cb49c90ea8d2373267 Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Tue, 2 Sep 2014 14:41:36 -0700 Subject: [PATCH] Removing MMX SAD calculation code. Removed functions: * vp9_sad_16x16_mmx * vp9_sad_8x16_mmx * vp9_sad_16x8_mmx * vp9_sad_8x8_mmx * vp9_sad_4x4_mmx Change-Id: Ic5174b93b64d65d846f0c11e72cab149e9472bc3 --- test/sad_test.cc | 15 -- vp9/common/vp9_rtcd_defs.pl | 10 +- vp9/encoder/x86/vp9_sad_mmx.asm | 427 -------------------------------- vp9/vp9cx.mk | 1 - 4 files changed, 5 insertions(+), 448 deletions(-) delete mode 100644 vp9/encoder/x86/vp9_sad_mmx.asm diff --git a/test/sad_test.cc b/test/sad_test.cc index e63770bd4..5377c1e73 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -505,21 +505,6 @@ const SadMxNParam mmx_tests[] = { INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests)); #endif // CONFIG_VP8_ENCODER -#if CONFIG_VP9_ENCODER -const SadMxNVp9Func sad_16x16_mmx_vp9 = vp9_sad16x16_mmx; -const SadMxNVp9Func sad_8x16_mmx_vp9 = vp9_sad8x16_mmx; -const SadMxNVp9Func sad_16x8_mmx_vp9 = vp9_sad16x8_mmx; -const SadMxNVp9Func sad_8x8_mmx_vp9 = vp9_sad8x8_mmx; -const SadMxNVp9Func sad_4x4_mmx_vp9 = vp9_sad4x4_mmx; -const SadMxNVp9Param mmx_vp9_tests[] = { - make_tuple(16, 16, sad_16x16_mmx_vp9), - make_tuple(8, 16, sad_8x16_mmx_vp9), - make_tuple(16, 8, sad_16x8_mmx_vp9), - make_tuple(8, 8, sad_8x8_mmx_vp9), - make_tuple(4, 4, sad_4x4_mmx_vp9), -}; -INSTANTIATE_TEST_CASE_P(MMX, SADVP9Test, ::testing::ValuesIn(mmx_vp9_tests)); -#endif // CONFIG_VP9_ENCODER #endif // HAVE_MMX #if HAVE_SSE diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 5dadee906..19a527539 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -545,16 +545,16 @@ add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_str specialize qw/vp9_sad32x32 neon/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad16x16 mmx neon/, "$sse2_x86inc"; +specialize qw/vp9_sad16x16 neon/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; +specialize qw/vp9_sad16x8/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; +specialize qw/vp9_sad8x16/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad8x8 mmx neon/, "$sse2_x86inc"; +specialize qw/vp9_sad8x8 neon/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vp9_sad8x4/, "$sse2_x86inc"; @@ -563,7 +563,7 @@ add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_strid specialize qw/vp9_sad4x8/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; +specialize qw/vp9_sad4x4/, "$sse_x86inc"; add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; diff --git a/vp9/encoder/x86/vp9_sad_mmx.asm b/vp9/encoder/x86/vp9_sad_mmx.asm deleted file mode 100644 index 32fdd23d7..000000000 --- a/vp9/encoder/x86/vp9_sad_mmx.asm +++ /dev/null @@ -1,427 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -global sym(vp9_sad16x16_mmx) PRIVATE -global sym(vp9_sad8x16_mmx) PRIVATE -global sym(vp9_sad8x8_mmx) PRIVATE -global sym(vp9_sad4x4_mmx) PRIVATE -global sym(vp9_sad16x8_mmx) PRIVATE - -;unsigned int vp9_sad16x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad16x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] - - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpcklbw mm2, mm6 - - punpckhbw mm1, mm6 - punpckhbw mm3, mm6 - - paddw mm0, mm2 - paddw mm1, mm3 - - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - paddw mm7, mm1 - - cmp rsi, rcx - jne .x16x16sad_mmx_loop - - - movq mm0, mm7 - - punpcklwd mm0, mm6 - punpckhwd mm7, mm6 - - paddw mm0, mm7 - movq mm7, mm0 - - - psrlq mm0, 32 - paddw mm7, mm0 - - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad8x16_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad8x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - - lea rcx, [rcx+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x16sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - paddw mm7, mm2 - cmp rsi, rcx - - jne .x8x16sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad8x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x8x8sad_mmx_loop: - - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - punpcklbw mm0, mm6 - - punpckhbw mm2, mm6 - paddw mm0, mm2 - - lea rsi, [rsi+rax] - add rdi, rdx - - paddw mm7, mm0 - cmp rsi, rcx - - jne .x8x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad4x4_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movq mm2, mm0 - psubusb mm0, mm1 - - psubusb mm1, mm2 - por mm0, mm1 - - movq mm2, mm0 - pxor mm3, mm3 - - punpcklbw mm0, mm3 - punpckhbw mm2, mm3 - - paddw mm0, mm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movd mm4, DWORD PTR [rsi] - movd mm5, DWORD PTR [rdi] - - movd mm6, DWORD PTR [rsi+rax] - movd mm7, DWORD PTR [rdi+rdx] - - punpcklbw mm4, mm6 - punpcklbw mm5, mm7 - - movq mm6, mm4 - psubusb mm4, mm5 - - psubusb mm5, mm6 - por mm4, mm5 - - movq mm5, mm4 - punpcklbw mm4, mm3 - - punpckhbw mm5, mm3 - paddw mm4, mm5 - - paddw mm0, mm4 - movq mm1, mm0 - - punpcklwd mm0, mm3 - punpckhwd mm1, mm3 - - paddw mm0, mm1 - movq mm1, mm0 - - psrlq mm0, 32 - paddw mm0, mm1 - - movq rax, mm0 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;unsigned int vp9_sad16x8_mmx( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride) -sym(vp9_sad16x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rax*8] - pxor mm7, mm7 - - pxor mm6, mm6 - -.x16x8sad_mmx_loop: - - movq mm0, [rsi] - movq mm1, [rdi] - - movq mm2, [rsi+8] - movq mm3, [rdi+8] - - movq mm4, mm0 - movq mm5, mm2 - - psubusb mm0, mm1 - psubusb mm1, mm4 - - psubusb mm2, mm3 - psubusb mm3, mm5 - - por mm0, mm1 - por mm2, mm3 - - movq mm1, mm0 - movq mm3, mm2 - - punpcklbw mm0, mm6 - punpckhbw mm1, mm6 - - punpcklbw mm2, mm6 - punpckhbw mm3, mm6 - - - paddw mm0, mm2 - paddw mm1, mm3 - - paddw mm0, mm1 - lea rsi, [rsi+rax] - - add rdi, rdx - paddw mm7, mm0 - - cmp rsi, rcx - jne .x16x8sad_mmx_loop - - movq mm0, mm7 - punpcklwd mm0, mm6 - - punpckhwd mm7, mm6 - paddw mm0, mm7 - - movq mm7, mm0 - psrlq mm0, 32 - - paddw mm7, mm0 - movq rax, mm7 - - pop rdi - pop rsi - mov rsp, rbp - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 3bcecc2b4..312f71700 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -93,7 +93,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h -VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm