x86: huffyuvdsp: add_hfyu_left_pred_bgr32
C MMX SSE2 Cycles: 3092 1053 578 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
7be79c76d3
commit
f743fa9c7f
@ -1,6 +1,7 @@
|
||||
;******************************************************************************
|
||||
;* SIMD-optimized HuffYUV functions
|
||||
;* Copyright (c) 2008 Loren Merritt
|
||||
;* Copyright (c) 2014 Christophe Gisquet
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
@ -222,3 +223,41 @@ INIT_MMX mmx
|
||||
ADD_BYTES
|
||||
INIT_XMM sse2
|
||||
ADD_BYTES
|
||||
|
||||
; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
|
||||
; intptr_t w, uint8_t *left)
|
||||
%macro LEFT_BGR32 0
|
||||
cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
|
||||
shl wq, 2
|
||||
movd m0, [leftq]
|
||||
lea dstq, [dstq + wq]
|
||||
lea srcq, [srcq + wq]
|
||||
LSHIFT m0, mmsize-4
|
||||
neg wq
|
||||
.loop:
|
||||
movu m1, [srcq+wq]
|
||||
mova m2, m1
|
||||
%if mmsize == 8
|
||||
punpckhdq m0, m0
|
||||
%endif
|
||||
LSHIFT m1, 4
|
||||
paddb m1, m2
|
||||
%if mmsize == 16
|
||||
pshufd m0, m0, q3333
|
||||
mova m2, m1
|
||||
LSHIFT m1, 8
|
||||
paddb m1, m2
|
||||
%endif
|
||||
paddb m0, m1
|
||||
movu [dstq+wq], m0
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
movd m0, [dstq-4]
|
||||
movd [leftq], m0
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
LEFT_BGR32
|
||||
INIT_XMM sse2
|
||||
LEFT_BGR32
|
||||
|
@ -41,6 +41,11 @@ int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
|
||||
int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src,
|
||||
intptr_t w, int left);
|
||||
|
||||
void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
|
||||
intptr_t w, uint8_t *left);
|
||||
void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
|
||||
intptr_t w, uint8_t *left);
|
||||
|
||||
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
@ -50,8 +55,10 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
|
||||
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
|
||||
#endif
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags))
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
c->add_bytes = ff_add_bytes_mmx;
|
||||
c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx;
|
||||
}
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
/* slower than cmov version on AMD */
|
||||
@ -62,6 +69,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->add_bytes = ff_add_bytes_sse2;
|
||||
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
|
||||
c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user