x86: hevc/sao: aligned source buffers
Usefull for at least band filter, for which: - Band filter call only: 32 64 Before: 16556 54015 After: 16497 52355 - Whole case: 32 64 Before: 37031 103008 After: 32045 93952
This commit is contained in:
parent
fa3eccb4f9
commit
bff7feb328
@ -284,12 +284,12 @@ static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
|
||||
{
|
||||
int ret, i;
|
||||
|
||||
frame->width = s->avctx->coded_width + 2;
|
||||
frame->height = s->avctx->coded_height + 2;
|
||||
frame->width = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE);
|
||||
frame->height = s->avctx->coded_height + 3;
|
||||
if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
|
||||
return ret;
|
||||
for (i = 0; frame->data[i]; i++) {
|
||||
int offset = frame->linesize[i] + (1 << sps->pixel_shift);
|
||||
int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE;
|
||||
frame->data[i] += offset;
|
||||
}
|
||||
frame->width = s->avctx->coded_width;
|
||||
|
@ -104,26 +104,26 @@ align 16
|
||||
|
||||
%assign i 0
|
||||
%rep %2
|
||||
movu m13, [srcq + i]
|
||||
mova m13, [srcq + i]
|
||||
punpcklbw m8, m13, m14
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
|
||||
punpckhbw m13, m14
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
|
||||
packuswb m8, m13
|
||||
movu [dstq + i], m8
|
||||
mova [dstq + i], m8
|
||||
%assign i i+mmsize
|
||||
%endrep
|
||||
|
||||
%if %1 == 48
|
||||
INIT_XMM cpuname
|
||||
|
||||
movu m13, [srcq + i]
|
||||
mova m13, [srcq + i]
|
||||
punpcklbw m8, m13, m14
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
|
||||
punpckhbw m13, m14
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
|
||||
packuswb m8, m13
|
||||
movu [dstq + i], m8
|
||||
mova [dstq + i], m8
|
||||
%assign i i+16
|
||||
%endif ; %1 == 48
|
||||
|
||||
@ -143,37 +143,37 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, dst, src, dststride, srcstride, of
|
||||
align 16
|
||||
.loop
|
||||
%if %2 == 8
|
||||
movu m8, [srcq]
|
||||
mova m8, [srcq]
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
|
||||
CLIPW m8, m14, m13
|
||||
movu [dstq], m8
|
||||
mova [dstq], m8
|
||||
%endif
|
||||
|
||||
%assign i 0
|
||||
%rep %3
|
||||
movu m8, [srcq + i]
|
||||
mova m8, [srcq + i]
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
|
||||
CLIPW m8, m14, m13
|
||||
movu [dstq + i], m8
|
||||
mova [dstq + i], m8
|
||||
|
||||
movu m9, [srcq + i + mmsize]
|
||||
mova m9, [srcq + i + mmsize]
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
|
||||
CLIPW m9, m14, m13
|
||||
movu [dstq + i + mmsize], m9
|
||||
mova [dstq + i + mmsize], m9
|
||||
%assign i i+mmsize*2
|
||||
%endrep
|
||||
|
||||
%if %2 == 48
|
||||
INIT_XMM cpuname
|
||||
movu m8, [srcq + i]
|
||||
mova m8, [srcq + i]
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
|
||||
CLIPW m8, m14, m13
|
||||
movu [dstq + i], m8
|
||||
mova [dstq + i], m8
|
||||
|
||||
movu m9, [srcq + i + mmsize]
|
||||
mova m9, [srcq + i + mmsize]
|
||||
HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
|
||||
CLIPW m9, m14, m13
|
||||
movu [dstq + i + mmsize], m9
|
||||
mova [dstq + i + mmsize], m9
|
||||
%assign i i+32
|
||||
%endif ; %1 == 48
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user