4369b9dc7b
Modeled from the prores version. Clips to [0;1023] and is bitexact. Bitexactness requires to add offsets in different places compared to prores or C, and makes the function approximately 2% slower. For 16 frames of a DNxHD 4:2:2 10bits test sequence: C: 60861 decicycles in idct, 1048205 runs, 371 skips sse2: 27567 decicycles in idct, 1048216 runs, 360 skips avx: 26272 decicycles in idct, 1048171 runs, 405 skips The add version is not implemented, so the corresponding dsp function is set to NULL to make it clear in a code executing it. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
54 lines
1.5 KiB
NASM
54 lines
1.5 KiB
NASM
;******************************************************************************
|
|
;* x86-SIMD-optimized IDCT for prores
|
|
;* this is identical to "simple" IDCT written by Michael Niedermayer
|
|
;* except for the clip range
|
|
;*
|
|
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
|
|
;*
|
|
;* This file is part of FFmpeg.
|
|
;*
|
|
;* FFmpeg is free software; you can redistribute it and/or
|
|
;* modify it under the terms of the GNU Lesser General Public
|
|
;* License as published by the Free Software Foundation; either
|
|
;* version 2.1 of the License, or (at your option) any later version.
|
|
;*
|
|
;* FFmpeg is distributed in the hope that it will be useful,
|
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;* Lesser General Public License for more details.
|
|
;*
|
|
;* You should have received a copy of the GNU Lesser General Public
|
|
;* License along with FFmpeg; if not, write to the Free Software
|
|
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
;******************************************************************************
|
|
|
|
%include "libavutil/x86/x86util.asm"
|
|
|
|
%if ARCH_X86_64
|
|
|
|
SECTION_RODATA
|
|
|
|
pw_88: times 8 dw 0x2008
|
|
cextern pw_1
|
|
cextern pw_4
|
|
cextern pw_1019
|
|
|
|
%include "libavcodec/x86/simple_idct10_template.asm"
|
|
|
|
section .text align=16
|
|
|
|
%macro idct_fn 0
|
|
cglobal prores_idct_put_10, 4, 4, 15
|
|
IDCT_FN pw_1, 15, pw_88, 18, pw_4, pw_1019, r3
|
|
RET
|
|
%endmacro
|
|
|
|
INIT_XMM sse2
|
|
idct_fn
|
|
%if HAVE_AVX_EXTERNAL
|
|
INIT_XMM avx
|
|
idct_fn
|
|
%endif
|
|
|
|
%endif
|