Speed up h_predictor_4x4

Modify h_predictor_4x4 with XMM registers.
Speed up by ~25% in ./test_intra_pred_speed.

Change-Id: Id01c34c48e75b9d56dfc2e93af12cf0c0326a279
This commit is contained in:
Jian Zhou 2015-11-19 11:34:22 -08:00
parent f3f6b6fe3e
commit d76032ae87

View File

@ -33,23 +33,20 @@ sh_b2333: db 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
SECTION .text SECTION .text
INIT_MMX ssse3 INIT_XMM ssse3
cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
movifnidn leftq, leftmp movd m0, [leftq]
add leftq, 4 punpcklbw m0, m0
mov lineq, -2 punpcklbw m0, m0
pxor m0, m0 movd [dstq ], m0
.loop: psrldq m0, 4
movd m1, [leftq+lineq*2 ] movd [dstq+strideq], m0
movd m2, [leftq+lineq*2+1]
pshufb m1, m0
pshufb m2, m0
movd [dstq ], m1
movd [dstq+strideq], m2
lea dstq, [dstq+strideq*2] lea dstq, [dstq+strideq*2]
inc lineq psrldq m0, 4
jnz .loop movd [dstq ], m0
REP_RET psrldq m0, 4
movd [dstq+strideq], m0
RET
INIT_MMX ssse3 INIT_MMX ssse3
cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left