Merge changes I0139f8e9,I7d2545fc
* changes: Code clean of highbd_tm_predictor_16x16 Code clean of highbd_dc_predictor_32x32
This commit is contained in:
commit
dbe2d8c33c
@ -162,6 +162,10 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
|||||||
#else
|
#else
|
||||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
|
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||||
|
&vpx_highbd_dc_predictor_32x32_c, 32, 8),
|
||||||
|
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
|
||||||
|
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
|
||||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||||
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
|
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
|
||||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||||
@ -218,6 +222,12 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
|||||||
#else
|
#else
|
||||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
|
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||||
|
&vpx_highbd_dc_predictor_32x32_c, 32,
|
||||||
|
10),
|
||||||
|
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
|
||||||
|
&vpx_highbd_tm_predictor_16x16_c, 16,
|
||||||
|
10),
|
||||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||||
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
|
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
|
||||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||||
@ -275,6 +285,12 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
|||||||
#else
|
#else
|
||||||
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
|
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
|
||||||
|
&vpx_highbd_dc_predictor_32x32_c, 32,
|
||||||
|
12),
|
||||||
|
make_tuple(&vpx_highbd_tm_predictor_16x16_sse2,
|
||||||
|
&vpx_highbd_tm_predictor_16x16_c, 16,
|
||||||
|
12),
|
||||||
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
|
||||||
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
|
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
|
||||||
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
|
||||||
|
@ -387,7 +387,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
|
specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||||
specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
|
specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||||
specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
|
specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
|
||||||
@ -438,7 +438,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
|
specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||||
specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
|
specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||||
specialize qw/vpx_highbd_dc_top_predictor_32x32/;
|
specialize qw/vpx_highbd_dc_top_predictor_32x32/;
|
||||||
|
@ -118,30 +118,29 @@ cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
|
|||||||
RESTORE_GOT
|
RESTORE_GOT
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
|
cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset
|
||||||
GET_GOT goffsetq
|
GET_GOT goffsetq
|
||||||
|
|
||||||
pxor m1, m1
|
|
||||||
mova m0, [aboveq]
|
mova m0, [aboveq]
|
||||||
mova m2, [aboveq+16]
|
mova m2, [aboveq+16]
|
||||||
mova m3, [aboveq+32]
|
mova m3, [aboveq+32]
|
||||||
mova m4, [aboveq+48]
|
mova m4, [aboveq+48]
|
||||||
mova m5, [leftq]
|
paddw m0, m2
|
||||||
mova m6, [leftq+16]
|
paddw m3, m4
|
||||||
mova m7, [leftq+32]
|
mova m2, [leftq]
|
||||||
mova m8, [leftq+48]
|
mova m4, [leftq+16]
|
||||||
|
mova m5, [leftq+32]
|
||||||
|
mova m6, [leftq+48]
|
||||||
|
paddw m2, m4
|
||||||
|
paddw m5, m6
|
||||||
|
paddw m0, m3
|
||||||
|
paddw m2, m5
|
||||||
|
pxor m1, m1
|
||||||
|
paddw m0, m2
|
||||||
DEFINE_ARGS dst, stride, stride3, lines4
|
DEFINE_ARGS dst, stride, stride3, lines4
|
||||||
lea stride3q, [strideq*3]
|
lea stride3q, [strideq*3]
|
||||||
mov lines4d, 8
|
mov lines4d, 8
|
||||||
paddw m0, m2
|
|
||||||
paddw m0, m3
|
|
||||||
paddw m0, m4
|
|
||||||
paddw m0, m5
|
|
||||||
paddw m0, m6
|
|
||||||
paddw m0, m7
|
|
||||||
paddw m0, m8
|
|
||||||
movhlps m2, m0
|
movhlps m2, m0
|
||||||
paddw m0, m2
|
paddw m0, m2
|
||||||
punpcklwd m0, m1
|
punpcklwd m0, m1
|
||||||
@ -177,7 +176,6 @@ cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
|
|||||||
|
|
||||||
RESTORE_GOT
|
RESTORE_GOT
|
||||||
REP_RET
|
REP_RET
|
||||||
%endif
|
|
||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
|
cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
|
||||||
@ -340,61 +338,54 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
|
|||||||
jnz .loop
|
jnz .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
cglobal highbd_tm_predictor_16x16, 5, 6, 9, dst, stride, above, left, bps, one
|
cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps
|
||||||
movd m2, [aboveq-2]
|
movd m2, [aboveq-2]
|
||||||
mova m0, [aboveq]
|
mova m0, [aboveq]
|
||||||
mova m1, [aboveq+16]
|
mova m1, [aboveq+16]
|
||||||
pshuflw m2, m2, 0x0
|
pshuflw m2, m2, 0x0
|
||||||
; Get the values to compute the maximum value at this bit depth
|
; Get the values to compute the maximum value at this bit depth
|
||||||
mov oned, 1
|
pcmpeqw m3, m3
|
||||||
pxor m7, m7
|
movd m4, bpsd
|
||||||
pxor m8, m8
|
|
||||||
pinsrw m7, oned, 0
|
|
||||||
pinsrw m8, bpsd, 0
|
|
||||||
pshuflw m7, m7, 0x0
|
|
||||||
DEFINE_ARGS dst, stride, line, left
|
|
||||||
punpcklqdq m7, m7
|
|
||||||
mov lineq, -8
|
|
||||||
mova m5, m7
|
|
||||||
punpcklqdq m2, m2
|
punpcklqdq m2, m2
|
||||||
psllw m7, m8
|
psllw m3, m4
|
||||||
add leftq, 32
|
pcmpeqw m5, m5
|
||||||
psubw m7, m5 ; max possible value
|
pxor m4, m4 ; min possible value
|
||||||
pxor m8, m8 ; min possible value
|
pxor m3, m5 ; max possible value
|
||||||
|
DEFINE_ARGS dst, stride, line, left
|
||||||
|
mov lineq, -8
|
||||||
psubw m0, m2
|
psubw m0, m2
|
||||||
psubw m1, m2
|
psubw m1, m2
|
||||||
.loop:
|
.loop:
|
||||||
movd m2, [leftq+lineq*4]
|
movd m7, [leftq]
|
||||||
movd m3, [leftq+lineq*4+2]
|
pshuflw m5, m7, 0x0
|
||||||
pshuflw m2, m2, 0x0
|
pshuflw m2, m7, 0x55
|
||||||
pshuflw m3, m3, 0x0
|
punpcklqdq m5, m5 ; l1 l1 l1 l1 l1 l1 l1 l1
|
||||||
punpcklqdq m2, m2
|
punpcklqdq m2, m2 ; l2 l2 l2 l2 l2 l2 l2 l2
|
||||||
punpcklqdq m3, m3
|
paddw m6, m5, m0 ; t1-tl+l1 to t4-tl+l1
|
||||||
paddw m4, m2, m0
|
paddw m5, m1 ; t5-tl+l1 to t8-tl+l1
|
||||||
paddw m5, m3, m0
|
pminsw m6, m3
|
||||||
|
pminsw m5, m3
|
||||||
|
pmaxsw m6, m4 ; Clamp to the bit-depth
|
||||||
|
pmaxsw m5, m4
|
||||||
|
mova [dstq ], m6
|
||||||
|
mova [dstq +16], m5
|
||||||
|
paddw m6, m2, m0
|
||||||
paddw m2, m1
|
paddw m2, m1
|
||||||
paddw m3, m1
|
pminsw m6, m3
|
||||||
;Clamp to the bit-depth
|
pminsw m2, m3
|
||||||
pminsw m4, m7
|
pmaxsw m6, m4
|
||||||
pminsw m5, m7
|
pmaxsw m2, m4
|
||||||
pminsw m2, m7
|
mova [dstq+strideq*2 ], m6
|
||||||
pminsw m3, m7
|
mova [dstq+strideq*2+16], m2
|
||||||
pmaxsw m4, m8
|
|
||||||
pmaxsw m5, m8
|
|
||||||
pmaxsw m2, m8
|
|
||||||
pmaxsw m3, m8
|
|
||||||
;Store the values
|
|
||||||
mova [dstq ], m4
|
|
||||||
mova [dstq+strideq*2 ], m5
|
|
||||||
mova [dstq +16], m2
|
|
||||||
mova [dstq+strideq*2+16], m3
|
|
||||||
lea dstq, [dstq+strideq*4]
|
lea dstq, [dstq+strideq*4]
|
||||||
inc lineq
|
inc lineq
|
||||||
|
lea leftq, [leftq+4]
|
||||||
|
|
||||||
jnz .loop
|
jnz .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
|
cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
|
||||||
movd m0, [aboveq-2]
|
movd m0, [aboveq-2]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user