Speed up h_predictor_4x4

Modify h_predictor_4x4 with XMM registers.
Speed up by ~25% in ./test_intra_pred_speed.

Change-Id: Id01c34c48e75b9d56dfc2e93af12cf0c0326a279
This commit is contained in:
Jian Zhou 2015-11-19 11:34:22 -08:00
parent f3f6b6fe3e
commit d76032ae87

View File

@ -33,23 +33,20 @@ sh_b2333: db 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
SECTION .text
INIT_MMX ssse3
INIT_XMM ssse3
cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
movifnidn leftq, leftmp
add leftq, 4
mov lineq, -2
pxor m0, m0
.loop:
movd m1, [leftq+lineq*2 ]
movd m2, [leftq+lineq*2+1]
pshufb m1, m0
pshufb m2, m0
movd [dstq ], m1
movd [dstq+strideq], m2
movd m0, [leftq]
punpcklbw m0, m0
punpcklbw m0, m0
movd [dstq ], m0
psrldq m0, 4
movd [dstq+strideq], m0
lea dstq, [dstq+strideq*2]
inc lineq
jnz .loop
REP_RET
psrldq m0, 4
movd [dstq ], m0
psrldq m0, 4
movd [dstq+strideq], m0
RET
INIT_MMX ssse3
cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left