vf_blend: Use integers for divide mode
2.5x faster for 8-bit mode without autovectorization in GCC, 2x slower with it on x86. However, since the platforms we enable GCC autovectorization on most probably has support for SSE2 optimization (added in the subsequent commit), this commit should in general do good.
This commit is contained in:
parent
4b750104ea
commit
a678d66781
@ -247,7 +247,7 @@ DEFINE_BLEND8(hardlight, (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
|
||||
DEFINE_BLEND8(hardmix, (A < (255 - B)) ? 0: 255)
|
||||
DEFINE_BLEND8(darken, FFMIN(A, B))
|
||||
DEFINE_BLEND8(lighten, FFMAX(A, B))
|
||||
DEFINE_BLEND8(divide, av_clip_uint8(((float)A / ((float)B) * 255)))
|
||||
DEFINE_BLEND8(divide, av_clip_uint8(B == 0 ? 255 : 255 * A / B))
|
||||
DEFINE_BLEND8(dodge, DODGE(A, B))
|
||||
DEFINE_BLEND8(burn, BURN(A, B))
|
||||
DEFINE_BLEND8(softlight, (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255))
|
||||
@ -287,7 +287,7 @@ DEFINE_BLEND16(hardlight, (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
|
||||
DEFINE_BLEND16(hardmix, (A < (65535 - B)) ? 0: 65535)
|
||||
DEFINE_BLEND16(darken, FFMIN(A, B))
|
||||
DEFINE_BLEND16(lighten, FFMAX(A, B))
|
||||
DEFINE_BLEND16(divide, av_clip_uint16(((float)A / ((float)B) * 65535)))
|
||||
DEFINE_BLEND16(divide, av_clip_uint16(B == 0 ? 65535 : 65535 * A / B))
|
||||
DEFINE_BLEND16(dodge, DODGE(A, B))
|
||||
DEFINE_BLEND16(burn, BURN(A, B))
|
||||
DEFINE_BLEND16(softlight, (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))
|
||||
|
Loading…
x
Reference in New Issue
Block a user