2x faster h264_idct_add8_10.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
parent
cc9947ffbe
commit
994c3550ff
@ -249,16 +249,17 @@ IDCT8_DC_ADD avx
|
||||
jmp .skipadd%2
|
||||
%endmacro
|
||||
|
||||
%assign last_block 16
|
||||
%macro ADD16_OP_INTRA 3
|
||||
cmp word [r4+%3], 0
|
||||
jnz .ac%2
|
||||
mov r6d, dword [r2+ 0]
|
||||
or r6d, dword [r2+64]
|
||||
mov r5d, dword [r2+ 0]
|
||||
or r5d, dword [r2+64]
|
||||
jz .skipblock%2
|
||||
mov r5d, dword [r1+(%2+0)*4]
|
||||
mov r5d, dword [r1+(%2+0)*4]
|
||||
call idct_dc_add_%1
|
||||
.skipblock%2:
|
||||
%if %2<15
|
||||
%if %2<last_block-2
|
||||
add r2, 128
|
||||
%endif
|
||||
.skipadd%2:
|
||||
@ -302,47 +303,33 @@ INIT_AVX
|
||||
IDCT_ADD16INTRA_10 avx
|
||||
%endif
|
||||
|
||||
%assign last_block 24
|
||||
;-----------------------------------------------------------------------------
|
||||
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro IDCT_ADD8 1
|
||||
cglobal h264_idct_add8_10_%1,5,7
|
||||
mov r5, 16
|
||||
add r2, 1024
|
||||
%ifdef PIC
|
||||
lea r11, [scan8_mem]
|
||||
%endif
|
||||
%ifdef ARCH_X86_64
|
||||
mov r10, r0
|
||||
mov r10, r0
|
||||
%endif
|
||||
.nextblock:
|
||||
movzx r6, byte [scan8+r5]
|
||||
movzx r6, byte [r4+r6]
|
||||
or r6d, dword [r2]
|
||||
test r6, r6
|
||||
jz .skipblock
|
||||
add r2, 1024
|
||||
mov r0, [r0]
|
||||
ADD16_OP_INTRA %1, 16, 1+1*8
|
||||
ADD16_OP_INTRA %1, 18, 1+2*8
|
||||
%ifdef ARCH_X86_64
|
||||
mov r0d, dword [r1+r5*4]
|
||||
add r0, [r10]
|
||||
mov r0, [r10+gprsize]
|
||||
%else
|
||||
mov r0, r0m
|
||||
mov r0, [r0]
|
||||
add r0, dword [r1+r5*4]
|
||||
mov r0, r0m
|
||||
mov r0, [r0+gprsize]
|
||||
%endif
|
||||
IDCT4_ADD_10 r0, r2, r3
|
||||
.skipblock:
|
||||
inc r5
|
||||
add r2, 64
|
||||
test r5, 3
|
||||
jnz .nextblock
|
||||
%ifdef ARCH_X86_64
|
||||
add r10, gprsize
|
||||
%else
|
||||
add r0mp, gprsize
|
||||
%endif
|
||||
test r5, 4
|
||||
jnz .nextblock
|
||||
ADD16_OP_INTRA %1, 20, 1+4*8
|
||||
ADD16_OP_INTRA %1, 22, 1+5*8
|
||||
REP_RET
|
||||
AC %1, 16
|
||||
AC %1, 18
|
||||
AC %1, 20
|
||||
AC %1, 22
|
||||
|
||||
%endmacro ; IDCT_ADD8
|
||||
|
||||
INIT_XMM
|
||||
|
Loading…
x
Reference in New Issue
Block a user