vf_blend: Add SSE2 optimization for multiply
5 times faster than C, 3 times overall.
This commit is contained in:
parent
a25c5dbb5e
commit
253209ac44
@ -24,6 +24,7 @@
|
|||||||
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
|
|
||||||
|
pw_1: times 8 dw 1
|
||||||
pw_128: times 8 dw 128
|
pw_128: times 8 dw 128
|
||||||
pw_255: times 8 dw 255
|
pw_255: times 8 dw 255
|
||||||
pb_127: times 16 db 127
|
pb_127: times 16 db 127
|
||||||
@ -101,6 +102,34 @@ BLEND_INIT difference128, 4
|
|||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
|
BLEND_INIT multiply, 4
|
||||||
|
pxor m2, m2
|
||||||
|
mova m3, [pw_1]
|
||||||
|
.nextrow:
|
||||||
|
mov xq, widthq
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
; word
|
||||||
|
; |--|
|
||||||
|
movh m0, [topq + xq] ; 0000xxxx
|
||||||
|
movh m1, [bottomq + xq]
|
||||||
|
punpcklbw m0, m2 ; 00xx00xx
|
||||||
|
punpcklbw m1, m2
|
||||||
|
|
||||||
|
pmullw m0, m1 ; xxxxxxxx a * b
|
||||||
|
paddw m0, m3
|
||||||
|
mova m1, m0
|
||||||
|
psrlw m1, 8
|
||||||
|
paddw m0, m1
|
||||||
|
psrlw m0, 8 ; 00xx00xx a * b / 255
|
||||||
|
|
||||||
|
packuswb m0, m0 ; 0000xxxx
|
||||||
|
movh [dstq + xq], m0
|
||||||
|
add xq, mmsize / 2
|
||||||
|
|
||||||
|
jl .loop
|
||||||
|
BLEND_END
|
||||||
|
|
||||||
BLEND_INIT average, 3
|
BLEND_INIT average, 3
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.nextrow:
|
.nextrow:
|
||||||
|
@ -36,6 +36,7 @@ BLEND_FUNC(average, sse2)
|
|||||||
BLEND_FUNC(and, sse2)
|
BLEND_FUNC(and, sse2)
|
||||||
BLEND_FUNC(darken, sse2)
|
BLEND_FUNC(darken, sse2)
|
||||||
BLEND_FUNC(difference128, sse2)
|
BLEND_FUNC(difference128, sse2)
|
||||||
|
BLEND_FUNC(multiply, sse2)
|
||||||
BLEND_FUNC(hardmix, sse2)
|
BLEND_FUNC(hardmix, sse2)
|
||||||
BLEND_FUNC(lighten, sse2)
|
BLEND_FUNC(lighten, sse2)
|
||||||
BLEND_FUNC(or, sse2)
|
BLEND_FUNC(or, sse2)
|
||||||
@ -61,6 +62,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
|
|||||||
case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
|
case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
|
||||||
case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
|
case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
|
||||||
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
|
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
|
||||||
|
case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
|
||||||
case BLEND_OR: param->blend = ff_blend_or_sse2; break;
|
case BLEND_OR: param->blend = ff_blend_or_sse2; break;
|
||||||
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
|
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
|
||||||
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
|
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user