Merge "Code clean of sad4xNx4D_sse"

This commit is contained in:
James Zern 2016-01-25 20:57:15 +00:00 committed by Gerrit Code Review
commit 3a2ad10de2
3 changed files with 36 additions and 36 deletions

View File

@ -700,16 +700,6 @@ const SadMxNParam mmx_tests[] = {
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#endif // HAVE_MMX
#if HAVE_SSE
#if CONFIG_USE_X86INC
const SadMxNx4Param x4d_sse_tests[] = {
make_tuple(4, 8, &vpx_sad4x8x4d_sse, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_sse, -1),
};
INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSE
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {
@ -828,6 +818,8 @@ const SadMxNx4Param x4d_sse2_tests[] = {
make_tuple(8, 16, &vpx_sad8x16x4d_sse2, -1),
make_tuple(8, 8, &vpx_sad8x8x4d_sse2, -1),
make_tuple(8, 4, &vpx_sad8x4x4d_sse2, -1),
make_tuple(4, 8, &vpx_sad4x8x4d_sse2, -1),
make_tuple(4, 4, &vpx_sad4x4x4d_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 8),
make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 8),

View File

@ -1156,10 +1156,10 @@ add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc";
specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
#
# Structured Similarity (SSIM)

View File

@ -20,33 +20,41 @@ SECTION .text
movd m4, [ref2q+%3]
movd m7, [ref3q+%3]
movd m5, [ref4q+%3]
punpckldq m0, [srcq +%4]
punpckldq m6, [ref1q+%5]
punpckldq m4, [ref2q+%5]
punpckldq m7, [ref3q+%5]
punpckldq m5, [ref4q+%5]
movd m1, [srcq +%4]
movd m2, [ref1q+%5]
punpckldq m0, m1
punpckldq m6, m2
movd m1, [ref2q+%5]
movd m2, [ref3q+%5]
movd m3, [ref4q+%5]
punpckldq m4, m1
punpckldq m7, m2
punpckldq m5, m3
movlhps m0, m0
movlhps m6, m4
movlhps m7, m5
psadbw m6, m0
psadbw m4, m0
psadbw m7, m0
psadbw m5, m0
punpckldq m6, m4
punpckldq m7, m5
%else
movd m1, [ref1q+%3]
movd m5, [ref1q+%5]
movd m2, [ref2q+%3]
movd m4, [ref2q+%5]
punpckldq m1, m5
punpckldq m2, m4
movd m3, [ref3q+%3]
movd m5, [ref3q+%5]
punpckldq m3, m5
movd m4, [ref4q+%3]
punpckldq m0, [srcq +%4]
punpckldq m1, [ref1q+%5]
punpckldq m2, [ref2q+%5]
punpckldq m3, [ref3q+%5]
punpckldq m4, [ref4q+%5]
movd m5, [ref4q+%5]
punpckldq m4, m5
movd m5, [srcq +%4]
punpckldq m0, m5
movlhps m0, m0
movlhps m1, m2
movlhps m3, m4
psadbw m1, m0
psadbw m2, m0
psadbw m3, m0
psadbw m4, m0
punpckldq m1, m2
punpckldq m3, m4
paddd m6, m1
paddd m7, m3
%endif
@ -170,7 +178,7 @@ SECTION .text
; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
%macro SADNXN4D 2
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
@ -192,7 +200,7 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
%endrep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
%if mmsize == 16
%if %1 > 4
pslldq m5, 4
pslldq m7, 4
por m4, m5
@ -207,8 +215,10 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
RET
%else
movifnidn r4, r4mp
movq [r4+0], m6
movq [r4+8], m7
pshufd m6, m6, 0x08
pshufd m7, m7, 0x08
movq [r4+0], m6
movq [r4+8], m7
RET
%endif
%endmacro
@ -225,7 +235,5 @@ SADNXN4D 16, 8
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4