Merge remote-tracking branch 'qatar/master'
* qatar/master: proresdsp: port x86 assembly to cpuflags. lavr: x86: improve non-SSE4 version of S16_TO_S32_SX macro lavfi: better channel layout negotiation alac: check for truncated packets alac: reverse lpc coeff order, simplify filter lavr: add x86-optimized mixing functions x86: add support for fmaddps fma4 instruction with abstraction to avx/sse tscc2: fix typo in array index build: use COMPILE template for HOSTOBJS build: do full flag handling for all compiler-type tools eval: fix printing of NaN in eval fate test. build: Rename aandct component to more descriptive aandcttables mpegaudio: bury inline asm under HAVE_INLINE_ASM. x86inc: automatically insert vzeroupper for YMM functions. rtmp: Check the buffer length of ping packets rtmp: Allow having more unknown data at the end of a chunk size packet without failing rtmp: Prevent reading outside of an allocate buffer when receiving server bandwidth packets Conflicts: Makefile configure libavcodec/x86/proresdsp.asm libavutil/eval.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
c6963a220d
15
common.mak
15
common.mak
@ -11,7 +11,7 @@ ifndef V
|
||||
Q = @
|
||||
ECHO = printf "$(1)\t%s\n" $(2)
|
||||
BRIEF = CC CXX AS YASM AR LD HOSTCC STRIP CP
|
||||
SILENT = DEPCC YASMDEP RM RANLIB
|
||||
SILENT = DEPCC DEPAS DEPHOSTCC YASMDEP RM RANLIB
|
||||
MSG = $@
|
||||
M = @$(call ECHO,$(TAG),$@);
|
||||
$(foreach VAR,$(BRIEF), \
|
||||
@ -26,15 +26,16 @@ ALLFFLIBS = avcodec avdevice avfilter avformat avresample avutil postproc swscal
|
||||
IFLAGS := -I. -I$(SRC_PATH)/
|
||||
CPPFLAGS := $(IFLAGS) $(CPPFLAGS)
|
||||
CFLAGS += $(ECFLAGS)
|
||||
CCFLAGS = $(CFLAGS)
|
||||
CCFLAGS = $(CPPFLAGS) $(CFLAGS)
|
||||
ASFLAGS := $(CPPFLAGS) $(ASFLAGS)
|
||||
CXXFLAGS := $(CFLAGS) $(CXXFLAGS)
|
||||
YASMFLAGS += $(IFLAGS) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm
|
||||
HOSTCFLAGS += $(IFLAGS)
|
||||
HOSTCCFLAGS = $(IFLAGS) $(HOSTCFLAGS)
|
||||
LDFLAGS := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS)
|
||||
|
||||
define COMPILE
|
||||
$($(1)DEP)
|
||||
$($(1)) $(CPPFLAGS) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $<
|
||||
$(call $(1)DEP,$(1))
|
||||
$($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $<
|
||||
endef
|
||||
|
||||
COMPILE_C = $(call COMPILE,CC)
|
||||
@ -101,7 +102,7 @@ checkheaders: $(filter-out $(SKIPHEADERS:.h=.ho),$(ALLHEADERS:.h=.ho))
|
||||
alltools: $(TOOLS)
|
||||
|
||||
$(HOSTOBJS): %.o: %.c
|
||||
$(HOSTCC) $(HOSTCFLAGS) -c -o $@ $<
|
||||
$(call COMPILE,HOSTCC)
|
||||
|
||||
$(HOSTPROGS): %$(HOSTEXESUF): %.o
|
||||
$(HOSTCC) $(HOSTLDFLAGS) -o $@ $< $(HOSTLIBS)
|
||||
@ -117,4 +118,4 @@ CLEANSUFFIXES = *.d *.o *~ *.ho *.map *.ver *.gcno *.gcda
|
||||
DISTCLEANSUFFIXES = *.pc
|
||||
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
|
||||
|
||||
-include $(wildcard $(OBJS:.o=.d) $(TESTOBJS:.o=.d))
|
||||
-include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d))
|
||||
|
330
configure
vendored
330
configure
vendored
@ -265,6 +265,7 @@ Optimization options (experts only):
|
||||
--disable-sse disable SSE optimizations
|
||||
--disable-ssse3 disable SSSE3 optimizations
|
||||
--disable-avx disable AVX optimizations
|
||||
--disable-fma4 disable FMA4 optimizations
|
||||
--disable-armv5te disable armv5te optimizations
|
||||
--disable-armv6 disable armv6 optimizations
|
||||
--disable-armv6t2 disable armv6t2 optimizations
|
||||
@ -1173,6 +1174,7 @@ ARCH_EXT_LIST='
|
||||
armv6t2
|
||||
armvfp
|
||||
avx
|
||||
fma4
|
||||
mmi
|
||||
mmx
|
||||
mmx2
|
||||
@ -1336,7 +1338,7 @@ HAVE_LIST="
|
||||
|
||||
# options emitted with CONFIG_ prefix but not available on command line
|
||||
CONFIG_EXTRA="
|
||||
aandct
|
||||
aandcttables
|
||||
avutil
|
||||
golomb
|
||||
gplv3
|
||||
@ -1450,6 +1452,7 @@ mmx2_deps="mmx"
|
||||
sse_deps="mmx"
|
||||
ssse3_deps="sse"
|
||||
avx_deps="ssse3"
|
||||
fma4_deps="avx"
|
||||
|
||||
aligned_stack_if_any="ppc x86"
|
||||
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
|
||||
@ -1477,7 +1480,7 @@ ac3_fixed_encoder_select="mdct ac3dsp"
|
||||
alac_encoder_select="lpc"
|
||||
amrnb_decoder_select="lsp"
|
||||
amrwb_decoder_select="lsp"
|
||||
amv_encoder_select="aandct"
|
||||
amv_encoder_select="aandcttables"
|
||||
atrac1_decoder_select="mdct sinewin"
|
||||
atrac3_decoder_select="mdct"
|
||||
binkaudio_dct_decoder_select="mdct rdft dct sinewin"
|
||||
@ -1487,13 +1490,13 @@ cook_decoder_select="mdct sinewin"
|
||||
cscd_decoder_suggest="zlib"
|
||||
dca_decoder_select="mdct"
|
||||
dirac_decoder_select="dwt golomb"
|
||||
dnxhd_encoder_select="aandct"
|
||||
dnxhd_encoder_select="aandcttables"
|
||||
dxa_decoder_select="zlib"
|
||||
eac3_decoder_select="ac3_decoder"
|
||||
eac3_encoder_select="mdct ac3dsp"
|
||||
eamad_decoder_select="aandct"
|
||||
eatgq_decoder_select="aandct"
|
||||
eatqi_decoder_select="aandct"
|
||||
eamad_decoder_select="aandcttables"
|
||||
eatgq_decoder_select="aandcttables"
|
||||
eatqi_decoder_select="aandcttables"
|
||||
exr_decoder_select="zlib"
|
||||
ffv1_decoder_select="golomb"
|
||||
flac_decoder_select="golomb"
|
||||
@ -1505,9 +1508,9 @@ flashsv2_decoder_select="zlib"
|
||||
flv_decoder_select="h263_decoder"
|
||||
flv_encoder_select="h263_encoder"
|
||||
fraps_decoder_select="huffman"
|
||||
h261_encoder_select="aandct"
|
||||
h261_encoder_select="aandcttables"
|
||||
h263_decoder_select="h263_parser"
|
||||
h263_encoder_select="aandct"
|
||||
h263_encoder_select="aandcttables"
|
||||
h263_vaapi_hwaccel_select="vaapi h263_decoder"
|
||||
h263i_decoder_select="h263_decoder"
|
||||
h263p_encoder_select="h263_encoder"
|
||||
@ -1523,9 +1526,9 @@ iac_decoder_select="fft mdct sinewin"
|
||||
imc_decoder_select="fft mdct sinewin"
|
||||
jpegls_decoder_select="golomb"
|
||||
jpegls_encoder_select="golomb"
|
||||
ljpeg_encoder_select="aandct"
|
||||
ljpeg_encoder_select="aandcttables"
|
||||
loco_decoder_select="golomb"
|
||||
mjpeg_encoder_select="aandct"
|
||||
mjpeg_encoder_select="aandcttables"
|
||||
mlp_decoder_select="mlp_parser"
|
||||
mp1_decoder_select="mpegaudiodsp"
|
||||
mp1float_decoder_select="mpegaudiodsp"
|
||||
@ -1544,13 +1547,13 @@ mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h"
|
||||
mpeg_xvmc_decoder_select="mpegvideo_decoder"
|
||||
mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder"
|
||||
mpeg1_vdpau_hwaccel_select="vdpau mpeg1video_decoder"
|
||||
mpeg1video_encoder_select="aandct"
|
||||
mpeg1video_encoder_select="aandcttables"
|
||||
mpeg2_crystalhd_decoder_select="crystalhd"
|
||||
mpeg2_dxva2_hwaccel_deps="dxva2api_h"
|
||||
mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder"
|
||||
mpeg2_vdpau_hwaccel_select="vdpau mpeg2video_decoder"
|
||||
mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder"
|
||||
mpeg2video_encoder_select="aandct"
|
||||
mpeg2video_encoder_select="aandcttables"
|
||||
mpeg4_crystalhd_decoder_select="crystalhd"
|
||||
mpeg4_decoder_select="h263_decoder mpeg4video_parser"
|
||||
mpeg4_encoder_select="h263_encoder"
|
||||
@ -1580,11 +1583,11 @@ rv40_decoder_select="golomb h264chroma h264pred h264qpel"
|
||||
shorten_decoder_select="golomb"
|
||||
sipr_decoder_select="lsp"
|
||||
snow_decoder_select="dwt"
|
||||
snow_encoder_select="aandct dwt"
|
||||
snow_encoder_select="aandcttables dwt"
|
||||
sonic_decoder_select="golomb"
|
||||
sonic_encoder_select="golomb"
|
||||
sonic_ls_encoder_select="golomb"
|
||||
svq1_encoder_select="aandct"
|
||||
svq1_encoder_select="aandcttables"
|
||||
svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
|
||||
svq3_decoder_suggest="zlib"
|
||||
theora_decoder_select="vp3_decoder"
|
||||
@ -1965,6 +1968,8 @@ ldflags_filter=echo
|
||||
AS_O='-o $@'
|
||||
CC_O='-o $@'
|
||||
CXX_O='-o $@'
|
||||
LD_O='-o $@'
|
||||
HOSTCC_O='-o $@'
|
||||
|
||||
host_cflags='-D_ISOC99_SOURCE -D_XOPEN_SOURCE=600 -O3 -g'
|
||||
host_libs='-lm'
|
||||
@ -1975,8 +1980,8 @@ target_path='$(CURDIR)'
|
||||
|
||||
# since the object filename is not given with the -MM flag, the compiler
|
||||
# is only able to print the basename, and we must add the path ourselves
|
||||
DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)'
|
||||
DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -MM'
|
||||
DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)'
|
||||
DEPFLAGS='-MM'
|
||||
|
||||
# find source path
|
||||
if test -f configure; then
|
||||
@ -2319,120 +2324,150 @@ tms470_flags(){
|
||||
done
|
||||
}
|
||||
|
||||
if $cc -v 2>&1 | grep -q '^gcc.*LLVM'; then
|
||||
cc_type=llvm_gcc
|
||||
gcc_extra_ver=$(expr "$($cc --version | head -n1)" : '.*\((.*)\)')
|
||||
cc_ident="llvm-gcc $($cc -dumpversion) $gcc_extra_ver"
|
||||
CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
cflags_speed='-O3'
|
||||
cflags_size='-Os'
|
||||
elif $cc -v 2>&1 | grep -qi ^gcc; then
|
||||
cc_type=gcc
|
||||
gcc_version=$($cc --version | head -n1)
|
||||
gcc_basever=$($cc -dumpversion)
|
||||
gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)')
|
||||
gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)")
|
||||
cc_ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver")
|
||||
if ! $cc -dumpversion | grep -q '^2\.'; then
|
||||
CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
probe_cc(){
|
||||
pfx=$1
|
||||
_cc=$2
|
||||
|
||||
unset _type _ident _cc_o _flags _cflags _ldflags _depflags _DEPCMD _DEPFLAGS
|
||||
_flags_filter=echo
|
||||
|
||||
if $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then
|
||||
_type=llvm_gcc
|
||||
gcc_extra_ver=$(expr "$($_cc --version | head -n1)" : '.*\((.*)\)')
|
||||
_ident="llvm-gcc $($_cc -dumpversion) $gcc_extra_ver"
|
||||
_depflags='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
_cflags_speed='-O3'
|
||||
_cflags_size='-Os'
|
||||
elif $_cc -v 2>&1 | grep -qi ^gcc; then
|
||||
_type=gcc
|
||||
gcc_version=$($_cc --version | head -n1)
|
||||
gcc_basever=$($_cc -dumpversion)
|
||||
gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)')
|
||||
gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)")
|
||||
_ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver")
|
||||
if ! $_cc -dumpversion | grep -q '^2\.'; then
|
||||
_depflags='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
fi
|
||||
_cflags_speed='-O3'
|
||||
_cflags_size='-Os'
|
||||
elif $_cc --version 2>/dev/null | grep -q Intel; then
|
||||
_type=icc
|
||||
_ident=$($_cc --version | head -n1)
|
||||
_depflags='-MMD'
|
||||
_cflags_speed='-O3'
|
||||
_cflags_size='-Os'
|
||||
_cflags_noopt='-O1'
|
||||
elif $_cc -v 2>&1 | grep -q xlc; then
|
||||
_type=xlc
|
||||
_ident=$($_cc -qversion 2>/dev/null | head -n1)
|
||||
_cflags_speed='-O5'
|
||||
_cflags_size='-O5 -qcompact'
|
||||
elif $_cc -V 2>/dev/null | grep -q Compaq; then
|
||||
_type=ccc
|
||||
_ident=$($_cc -V | head -n1 | cut -d' ' -f1-3)
|
||||
_DEPFLAGS='-M'
|
||||
debuglevel=3
|
||||
_ldflags='-Wl,-z,now' # calls to libots crash without this
|
||||
_cflags_speed='-fast'
|
||||
_cflags_size='-O1'
|
||||
elif $_cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then
|
||||
test -d "$sysroot" || die "No valid sysroot specified."
|
||||
_type=armcc
|
||||
_ident=$($_cc --vsn | head -n1)
|
||||
armcc_conf="$PWD/armcc.conf"
|
||||
$_cc --arm_linux_configure \
|
||||
--arm_linux_config_file="$armcc_conf" \
|
||||
--configure_sysroot="$sysroot" \
|
||||
--configure_cpp_headers="$sysinclude" >>$logfile 2>&1 ||
|
||||
die "Error creating armcc configuration file."
|
||||
$_cc --vsn | grep -q RVCT && armcc_opt=rvct || armcc_opt=armcc
|
||||
_flags="--arm_linux_config_file=$armcc_conf --translate_gcc"
|
||||
as_default="${cross_prefix}gcc"
|
||||
_depflags='-MMD'
|
||||
_cflags_speed='-O3'
|
||||
_cflags_size='-Os'
|
||||
elif $_cc -version 2>/dev/null | grep -q TMS470; then
|
||||
_type=tms470
|
||||
_ident=$($_cc -version | head -n1 | tr -s ' ')
|
||||
_flags='--gcc --abi=eabi -me'
|
||||
_cflags='-D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__='
|
||||
_cc_o='-fe=$@'
|
||||
as_default="${cross_prefix}gcc"
|
||||
ld_default="${cross_prefix}gcc"
|
||||
_depflags='-ppa -ppd=$(@:.o=.d)'
|
||||
_cflags_speed='-O3 -mf=5'
|
||||
_cflags_size='-O3 -mf=2'
|
||||
_flags_filter=tms470_flags
|
||||
elif $_cc -v 2>&1 | grep -q clang; then
|
||||
_type=clang
|
||||
_ident=$($_cc --version | head -n1)
|
||||
_depflags='-MMD'
|
||||
_cflags_speed='-O3'
|
||||
_cflags_size='-Os'
|
||||
elif $_cc -V 2>&1 | grep -q Sun; then
|
||||
_type=suncc
|
||||
_ident=$($_cc -V 2>&1 | head -n1 | cut -d' ' -f 2-)
|
||||
_DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)'
|
||||
_DEPFLAGS='-xM1'
|
||||
_ldflags='-std=c99'
|
||||
_cflags_speed='-O5'
|
||||
_cflags_size='-O5 -xspace'
|
||||
_flags_filter=suncc_flags
|
||||
elif $_cc -v 2>&1 | grep -q 'PathScale\|Path64'; then
|
||||
_type=pathscale
|
||||
_ident=$($_cc -v 2>&1 | head -n1 | tr -d :)
|
||||
_depflags='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
_cflags_speed='-O2'
|
||||
_cflags_size='-Os'
|
||||
_flags_filter='filter_out -Wdisabled-optimization'
|
||||
elif $_cc -v 2>&1 | grep -q Open64; then
|
||||
_type=open64
|
||||
_ident=$($_cc -v 2>&1 | head -n1 | tr -d :)
|
||||
_depflags='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
_cflags_speed='-O2'
|
||||
_cflags_size='-Os'
|
||||
_flags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros'
|
||||
elif $_cc -V 2>&1 | grep -q Portland; then
|
||||
_type=pgi
|
||||
_ident="PGI $($_cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')"
|
||||
opt_common='-alias=ansi -Mlre -Mpre'
|
||||
_cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common"
|
||||
_cflags_size="-O2 -Munroll=c:1 $opt_common"
|
||||
_cflags_noopt="-O1"
|
||||
_flags_filter=pgi_flags
|
||||
fi
|
||||
cflags_speed='-O3'
|
||||
cflags_size='-Os'
|
||||
elif $cc --version 2>/dev/null | grep -q Intel; then
|
||||
cc_type=icc
|
||||
cc_ident=$($cc --version | head -n1)
|
||||
CC_DEPFLAGS='-MMD'
|
||||
AS_DEPFLAGS='-MMD'
|
||||
cflags_speed='-O3'
|
||||
cflags_size='-Os'
|
||||
cflags_noopt='-O1'
|
||||
elif $cc -v 2>&1 | grep -q xlc; then
|
||||
cc_type=xlc
|
||||
cc_ident=$($cc -qversion 2>/dev/null | head -n1)
|
||||
cflags_speed='-O5'
|
||||
cflags_size='-O5 -qcompact'
|
||||
elif $cc -V 2>/dev/null | grep -q Compaq; then
|
||||
cc_type=ccc
|
||||
cc_ident=$($cc -V | head -n1 | cut -d' ' -f1-3)
|
||||
DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -M'
|
||||
debuglevel=3
|
||||
add_ldflags -Wl,-z,now # calls to libots crash without this
|
||||
cflags_speed='-fast'
|
||||
cflags_size='-O1'
|
||||
elif $cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then
|
||||
test -d "$sysroot" || die "No valid sysroot specified."
|
||||
cc_type=armcc
|
||||
cc_ident=$($cc --vsn | head -n1)
|
||||
armcc_conf="$PWD/armcc.conf"
|
||||
$cc --arm_linux_configure \
|
||||
--arm_linux_config_file="$armcc_conf" \
|
||||
--configure_sysroot="$sysroot" \
|
||||
--configure_cpp_headers="$sysinclude" >>$logfile 2>&1 ||
|
||||
die "Error creating armcc configuration file."
|
||||
$cc --vsn | grep -q RVCT && armcc_opt=rvct || armcc_opt=armcc
|
||||
cc="$cc --arm_linux_config_file=$armcc_conf --translate_gcc"
|
||||
as_default="${cross_prefix}gcc"
|
||||
CC_DEPFLAGS='-MMD'
|
||||
AS_DEPFLAGS='-MMD'
|
||||
cflags_speed='-O3'
|
||||
cflags_size='-Os'
|
||||
asflags_filter="filter_out -W${armcc_opt}*"
|
||||
elif $cc -version 2>/dev/null | grep -q TMS470; then
|
||||
cc_type=tms470
|
||||
cc_ident=$($cc -version | head -n1 | tr -s ' ')
|
||||
cc="$cc --gcc --abi=eabi -me"
|
||||
CC_O='-fe=$@'
|
||||
as_default="${cross_prefix}gcc"
|
||||
ld_default="${cross_prefix}gcc"
|
||||
add_cflags -D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__=
|
||||
CC_DEPFLAGS='-ppa -ppd=$(@:.o=.d)'
|
||||
AS_DEPFLAGS='-MMD'
|
||||
cflags_speed='-O3 -mf=5'
|
||||
cflags_size='-O3 -mf=2'
|
||||
cflags_filter=tms470_flags
|
||||
elif $cc -v 2>&1 | grep -q clang; then
|
||||
cc_type=clang
|
||||
cc_ident=$($cc --version | head -n1)
|
||||
CC_DEPFLAGS='-MMD'
|
||||
AS_DEPFLAGS='-MMD'
|
||||
cflags_speed='-O3'
|
||||
cflags_size='-Os'
|
||||
elif $cc -V 2>&1 | grep -q Sun; then
|
||||
cc_type=suncc
|
||||
cc_ident=$($cc -V 2>&1 | head -n1 | cut -d' ' -f 2-)
|
||||
DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)'
|
||||
DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -xM1'
|
||||
add_ldflags -xc99
|
||||
cflags_speed='-O5'
|
||||
cflags_size='-O5 -xspace'
|
||||
cflags_filter=suncc_flags
|
||||
elif $cc -v 2>&1 | grep -q 'PathScale\|Path64'; then
|
||||
cc_type=pathscale
|
||||
cc_ident=$($cc -v 2>&1 | head -n1 | tr -d :)
|
||||
CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
cflags_speed='-O2'
|
||||
cflags_size='-Os'
|
||||
cflags_filter='filter_out -Wdisabled-optimization'
|
||||
elif $cc -v 2>&1 | grep -q Open64; then
|
||||
cc_type=open64
|
||||
cc_ident=$($cc -v 2>&1 | head -n1 | tr -d :)
|
||||
CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@'
|
||||
cflags_speed='-O2'
|
||||
cflags_size='-Os'
|
||||
cflags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros'
|
||||
elif $cc -V 2>&1 | grep -q Portland; then
|
||||
cc_type=pgi
|
||||
cc_ident="PGI $($cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')"
|
||||
opt_common='-alias=ansi -Mlre -Mpre'
|
||||
cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common"
|
||||
cflags_size="-O2 -Munroll=c:1 $opt_common"
|
||||
cflags_noopt="-O1"
|
||||
cflags_filter=pgi_flags
|
||||
fi
|
||||
|
||||
eval ${pfx}_type=\$_type
|
||||
eval ${pfx}_ident=\$_ident
|
||||
}
|
||||
|
||||
set_ccvars(){
|
||||
eval ${1}_O=\${_cc_o-\${${1}_O}}
|
||||
|
||||
if [ -n "$_depflags" ]; then
|
||||
eval ${1}_DEPFLAGS=\$_depflags
|
||||
else
|
||||
eval ${1}DEP=\${_DEPCMD:-\$DEPCMD}
|
||||
eval ${1}DEP_FLAGS=\${_DEPFLAGS:-\$DEPFLAGS}
|
||||
eval DEP${1}FLAGS=\$_flags
|
||||
fi
|
||||
}
|
||||
|
||||
probe_cc cc "$cc"
|
||||
cflags_filter=$_flags_filter
|
||||
cflags_speed=$_cflags_speed
|
||||
cflags_size=$_cflags_size
|
||||
cflags_noopt=$_cflags_noopt
|
||||
add_cflags $_flags $_cflags
|
||||
cc_ldflags=$_ldflags
|
||||
set_ccvars CC
|
||||
|
||||
probe_cc hostcc "$host_cc"
|
||||
host_cflags_filter=$_flags_filter
|
||||
host_ldflags_filter=$_flags_filter
|
||||
add_host_cflags $_flags $_cflags
|
||||
add_host_ldflags $_flags $_ldflags
|
||||
set_ccvars HOSTCC
|
||||
|
||||
test -n "$cc_type" && enable $cc_type ||
|
||||
warn "Unknown C compiler $cc, unable to select optimal CFLAGS"
|
||||
@ -2442,9 +2477,23 @@ test -n "$cc_type" && enable $cc_type ||
|
||||
: ${ld_default:=$cc}
|
||||
set_default ar as dep_cc ld
|
||||
|
||||
test -n "$CC_DEPFLAGS" || CCDEP=$DEPEND_CMD
|
||||
test -n "$CXX_DEPFLAGS" || CXXDEP=$DEPEND_CMD
|
||||
test -n "$AS_DEPFLAGS" || ASDEP=$DEPEND_CMD
|
||||
probe_cc as "$as"
|
||||
asflags_filter=$_flags_filter
|
||||
add_asflags $_flags $_cflags
|
||||
set_ccvars AS
|
||||
|
||||
probe_cc ld "$ld"
|
||||
ldflags_filter=$_flags_filter
|
||||
add_ldflags $_flags $_ldflags
|
||||
test "$cc_type" != "$ld_type" && add_ldflags $cc_ldflags
|
||||
LD_O=${_cc_o-$LD_O}
|
||||
|
||||
if [ -z "$CC_DEPFLAGS" ] && [ "$dep_cc" != "$cc" ]; then
|
||||
probe_cc depcc "$dep_cc"
|
||||
CCDEP=${_DEPCMD:-$DEPCMD}
|
||||
CCDEP_FLAGS=${_DEPFLAGS:=$DEPFLAGS}
|
||||
DEPCCFLAGS=$_flags
|
||||
fi
|
||||
|
||||
add_cflags $extra_cflags
|
||||
add_cxxflags $extra_cxxflags
|
||||
@ -3140,6 +3189,7 @@ EOF
|
||||
check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
|
||||
die "yasm not found, use --disable-yasm for a crippled build"
|
||||
check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
|
||||
check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
|
||||
fi
|
||||
|
||||
case "$cpu" in
|
||||
@ -3673,6 +3723,7 @@ if enabled x86; then
|
||||
echo "SSE enabled ${sse-no}"
|
||||
echo "SSSE3 enabled ${ssse3-no}"
|
||||
echo "AVX enabled ${avx-no}"
|
||||
echo "FMA4 enabled ${fma4-no}"
|
||||
echo "CMOV enabled ${cmov-no}"
|
||||
echo "CMOV is fast ${fast_cmov-no}"
|
||||
echo "EBX available ${ebx_available-no}"
|
||||
@ -3814,6 +3865,9 @@ CXX=$cxx
|
||||
AS=$as
|
||||
LD=$ld
|
||||
DEPCC=$dep_cc
|
||||
DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS)
|
||||
DEPAS=$as
|
||||
DEPASFLAGS=$DEPASFLAGS \$(CPPFLAGS)
|
||||
YASM=$yasmexe
|
||||
YASMDEP=$yasmexe
|
||||
AR=$ar
|
||||
@ -3825,9 +3879,10 @@ CPPFLAGS=$CPPFLAGS
|
||||
CFLAGS=$CFLAGS
|
||||
CXXFLAGS=$CXXFLAGS
|
||||
ASFLAGS=$ASFLAGS
|
||||
AS_O=$CC_O
|
||||
AS_O=$AS_O
|
||||
CC_O=$CC_O
|
||||
CXX_O=$CXX_O
|
||||
LD_O=$LD_O
|
||||
LDFLAGS=$LDFLAGS
|
||||
FFSERVERLDFLAGS=$FFSERVERLDFLAGS
|
||||
SHFLAGS=$SHFLAGS
|
||||
@ -3842,10 +3897,11 @@ SLIBPREF=$SLIBPREF
|
||||
SLIBSUF=$SLIBSUF
|
||||
EXESUF=$EXESUF
|
||||
EXTRA_VERSION=$extra_version
|
||||
DEPFLAGS=$DEPFLAGS
|
||||
CCDEP=$CCDEP
|
||||
CXXDEP=$CXXDEP
|
||||
CCDEP_FLAGS=$CCDEP_FLAGS
|
||||
ASDEP=$ASDEP
|
||||
ASDEP_FLAGS=$ASDEP_FLAGS
|
||||
CC_DEPFLAGS=$CC_DEPFLAGS
|
||||
AS_DEPFLAGS=$AS_DEPFLAGS
|
||||
HOSTCC=$host_cc
|
||||
@ -3853,6 +3909,12 @@ HOSTCFLAGS=$host_cflags
|
||||
HOSTEXESUF=$HOSTEXESUF
|
||||
HOSTLDFLAGS=$host_ldflags
|
||||
HOSTLIBS=$host_libs
|
||||
DEPHOSTCC=$host_cc
|
||||
DEPHOSTCCFLAGS=$DEPHOSTCCFLAGS \$(HOSTCCFLAGS)
|
||||
HOSTCCDEP=$HOSTCCDEP
|
||||
HOSTCCDEP_FLAGS=$HOSTCCDEP_FLAGS
|
||||
HOSTCC_DEPFLAGS=$HOSTCC_DEPFLAGS
|
||||
HOSTCC_O=$HOSTCC_O
|
||||
TARGET_EXEC=$target_exec
|
||||
TARGET_PATH=$target_path
|
||||
SDL_LIBS=$sdl_libs
|
||||
|
@ -28,8 +28,6 @@ doc/%.txt: doc/%.texi
|
||||
$(Q)$(TEXIDEP)
|
||||
$(M)makeinfo --force --no-headers -o $@ $< 2>/dev/null
|
||||
|
||||
doc/print_options.o: libavformat/options_table.h libavcodec/options_table.h
|
||||
|
||||
GENTEXI = format codec
|
||||
GENTEXI := $(GENTEXI:%=doc/avoptions_%.texi)
|
||||
|
||||
|
@ -32,7 +32,7 @@ OBJS = allcodecs.o \
|
||||
utils.o \
|
||||
|
||||
# parts needed for many different codecs
|
||||
OBJS-$(CONFIG_AANDCT) += aandcttab.o
|
||||
OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o
|
||||
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o
|
||||
OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o
|
||||
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
|
||||
|
@ -200,6 +200,7 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
|
||||
int lpc_order, int lpc_quant)
|
||||
{
|
||||
int i;
|
||||
int32_t *pred = buffer_out;
|
||||
|
||||
/* first sample always copies */
|
||||
*buffer_out = *error_buffer;
|
||||
@ -223,37 +224,35 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
|
||||
}
|
||||
|
||||
/* read warm-up samples */
|
||||
for (i = 0; i < lpc_order; i++) {
|
||||
buffer_out[i + 1] = sign_extend(buffer_out[i] + error_buffer[i + 1],
|
||||
bps);
|
||||
}
|
||||
for (i = 1; i <= lpc_order; i++)
|
||||
buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
|
||||
|
||||
/* NOTE: 4 and 8 are very common cases that could be optimized. */
|
||||
|
||||
for (i = lpc_order; i < nb_samples - 1; i++) {
|
||||
for (; i < nb_samples; i++) {
|
||||
int j;
|
||||
int val = 0;
|
||||
int error_val = error_buffer[i + 1];
|
||||
int error_val = error_buffer[i];
|
||||
int error_sign;
|
||||
int d = buffer_out[i - lpc_order];
|
||||
int d = *pred++;
|
||||
|
||||
/* LPC prediction */
|
||||
for (j = 0; j < lpc_order; j++)
|
||||
val += (buffer_out[i - j] - d) * lpc_coefs[j];
|
||||
val += (pred[j] - d) * lpc_coefs[j];
|
||||
val = (val + (1 << (lpc_quant - 1))) >> lpc_quant;
|
||||
val += d + error_val;
|
||||
buffer_out[i + 1] = sign_extend(val, bps);
|
||||
buffer_out[i] = sign_extend(val, bps);
|
||||
|
||||
/* adapt LPC coefficients */
|
||||
error_sign = sign_only(error_val);
|
||||
if (error_sign) {
|
||||
for (j = lpc_order - 1; j >= 0 && error_val * error_sign > 0; j--) {
|
||||
for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) {
|
||||
int sign;
|
||||
val = d - buffer_out[i - j];
|
||||
val = d - pred[j];
|
||||
sign = sign_only(val) * error_sign;
|
||||
lpc_coefs[j] -= sign;
|
||||
val *= sign;
|
||||
error_val -= (val >> lpc_quant) * (lpc_order - j);
|
||||
error_val -= (val >> lpc_quant) * (j + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -356,7 +355,7 @@ static int decode_element(AVCodecContext *avctx, void *data, int ch_index,
|
||||
lpc_order[ch] = get_bits(&alac->gb, 5);
|
||||
|
||||
/* read the predictor table */
|
||||
for (i = 0; i < lpc_order[ch]; i++)
|
||||
for (i = lpc_order[ch] - 1; i >= 0; i--)
|
||||
lpc_coefs[ch][i] = get_sbits(&alac->gb, 16);
|
||||
}
|
||||
|
||||
@ -477,16 +476,19 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
|
||||
ALACContext *alac = avctx->priv_data;
|
||||
enum RawDataBlockType element;
|
||||
int channels;
|
||||
int ch, ret;
|
||||
int ch, ret, got_end;
|
||||
|
||||
init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8);
|
||||
|
||||
got_end = 0;
|
||||
alac->nb_samples = 0;
|
||||
ch = 0;
|
||||
while (get_bits_left(&alac->gb)) {
|
||||
while (get_bits_left(&alac->gb) >= 3) {
|
||||
element = get_bits(&alac->gb, 3);
|
||||
if (element == TYPE_END)
|
||||
if (element == TYPE_END) {
|
||||
got_end = 1;
|
||||
break;
|
||||
}
|
||||
if (element > TYPE_CPE && element != TYPE_LFE) {
|
||||
av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d", element);
|
||||
return AVERROR_PATCHWELCOME;
|
||||
@ -501,11 +503,15 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
|
||||
ret = decode_element(avctx, data,
|
||||
alac_channel_layout_offsets[alac->channels - 1][ch],
|
||||
channels);
|
||||
if (ret < 0)
|
||||
if (ret < 0 && get_bits_left(&alac->gb))
|
||||
return ret;
|
||||
|
||||
ch += channels;
|
||||
}
|
||||
if (!got_end) {
|
||||
av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n",
|
||||
|
@ -298,8 +298,8 @@ static int tscc2_decode_frame(AVCodecContext *avctx, void *data,
|
||||
if (!size) {
|
||||
int skip_row = 1, j, off = i * c->mb_width;
|
||||
for (j = 0; j < c->mb_width; j++) {
|
||||
if (c->slice_quants[off + i] == 1 ||
|
||||
c->slice_quants[off + i] == 2) {
|
||||
if (c->slice_quants[off + j] == 1 ||
|
||||
c->slice_quants[off + j] == 2) {
|
||||
skip_row = 0;
|
||||
break;
|
||||
}
|
||||
|
@ -1158,12 +1158,7 @@ ALIGN 16
|
||||
add src1q, 2*mmsize
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
@ -1193,12 +1188,7 @@ ALIGN 16
|
||||
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
|
||||
%endif
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%endif
|
||||
.end:
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
@ -750,9 +750,6 @@ section .text
|
||||
; The others pass args in registers and don't spill anything.
|
||||
cglobal fft_dispatch%2, 2,5,8, z, nbits
|
||||
FFT_DISPATCH fullsuffix, nbits
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
%endif
|
||||
RET
|
||||
%endmacro ; DECL_FFT
|
||||
|
||||
@ -957,9 +954,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
|
||||
%1 r0, r1, r6, rtcos, rtsin
|
||||
%if ARCH_X86_64 == 0
|
||||
add esp, 12
|
||||
%endif
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
%endif
|
||||
RET
|
||||
%endmacro
|
||||
|
@ -36,6 +36,8 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
|
||||
|
||||
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define MACS(rt, ra, rb) rt+=(ra)*(rb)
|
||||
#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
|
||||
|
||||
@ -178,6 +180,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
|
||||
*out = sum;
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#define DECL_IMDCT_BLOCKS(CPU1, CPU2) \
|
||||
static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
|
||||
@ -241,9 +244,11 @@ void ff_mpadsp_init_mmx(MPADSPContext *s)
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
if (mm_flags & AV_CPU_FLAG_SSE2) {
|
||||
s->apply_window_float = apply_window_mp3;
|
||||
}
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
#if HAVE_YASM
|
||||
if (0) {
|
||||
#if HAVE_AVX
|
||||
|
@ -83,8 +83,7 @@ section .text align=16
|
||||
|
||||
; %1 = row or col (for rounding variable)
|
||||
; %2 = number of bits to shift at the end
|
||||
; %3 = optimization
|
||||
%macro IDCT_1D 3
|
||||
%macro IDCT_1D 2
|
||||
; a0 = (W4 * row[0]) + (1 << (15 - 1));
|
||||
; a1 = a0;
|
||||
; a2 = a0;
|
||||
@ -235,8 +234,8 @@ section .text align=16
|
||||
|
||||
; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
|
||||
; DCTELEM *block, const int16_t *qmat);
|
||||
%macro idct_put_fn 2
|
||||
cglobal prores_idct_put_10_%1, 4, 4, %2
|
||||
%macro idct_put_fn 1
|
||||
cglobal prores_idct_put_10, 4, 4, %1
|
||||
movsxd r1, r1d
|
||||
pxor m15, m15 ; zero
|
||||
|
||||
@ -252,7 +251,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
|
||||
pmullw m13,[r3+64]
|
||||
pmullw m12,[r3+96]
|
||||
|
||||
IDCT_1D row, 15, %1
|
||||
IDCT_1D row, 15
|
||||
|
||||
; transpose for second part of IDCT
|
||||
TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
|
||||
@ -267,7 +266,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
|
||||
|
||||
; for (i = 0; i < 8; i++)
|
||||
; idctSparseColAdd(dest + i, line_size, block + i);
|
||||
IDCT_1D col, 18, %1
|
||||
IDCT_1D col, 18
|
||||
|
||||
; clip/store
|
||||
mova m3, [pw_4]
|
||||
@ -302,13 +301,27 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM
|
||||
idct_put_fn sse2, 16
|
||||
INIT_XMM
|
||||
idct_put_fn sse4, 16
|
||||
%macro SIGNEXTEND 2-3 ; dstlow, dsthigh, tmp
|
||||
%if cpuflag(sse4)
|
||||
movhlps %2, %1
|
||||
pmovsxwd %1, %1
|
||||
pmovsxwd %2, %2
|
||||
%else ; sse2
|
||||
pxor %3, %3
|
||||
pcmpgtw %3, %1
|
||||
mova %2, %1
|
||||
punpcklwd %1, %3
|
||||
punpckhwd %2, %3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
idct_put_fn 16
|
||||
INIT_XMM sse4
|
||||
idct_put_fn 16
|
||||
%if HAVE_AVX
|
||||
INIT_AVX
|
||||
idct_put_fn avx, 16
|
||||
INIT_XMM avx
|
||||
idct_put_fn 16
|
||||
%endif
|
||||
|
||||
%endif
|
||||
|
@ -578,11 +578,44 @@ static void swap_samplerates(AVFilterGraph *graph)
|
||||
swap_samplerates_on_filter(graph->filters[i]);
|
||||
}
|
||||
|
||||
#define CH_CENTER_PAIR (AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER)
|
||||
#define CH_FRONT_PAIR (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT)
|
||||
#define CH_STEREO_PAIR (AV_CH_STEREO_LEFT | AV_CH_STEREO_RIGHT)
|
||||
#define CH_WIDE_PAIR (AV_CH_WIDE_LEFT | AV_CH_WIDE_RIGHT)
|
||||
#define CH_SIDE_PAIR (AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT)
|
||||
#define CH_DIRECT_PAIR (AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT)
|
||||
#define CH_BACK_PAIR (AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT)
|
||||
|
||||
/* allowable substitutions for channel pairs when comparing layouts,
|
||||
* ordered by priority for both values */
|
||||
static const uint64_t ch_subst[][2] = {
|
||||
{ CH_FRONT_PAIR, CH_CENTER_PAIR },
|
||||
{ CH_FRONT_PAIR, CH_WIDE_PAIR },
|
||||
{ CH_FRONT_PAIR, AV_CH_FRONT_CENTER },
|
||||
{ CH_CENTER_PAIR, CH_FRONT_PAIR },
|
||||
{ CH_CENTER_PAIR, CH_WIDE_PAIR },
|
||||
{ CH_CENTER_PAIR, AV_CH_FRONT_CENTER },
|
||||
{ CH_WIDE_PAIR, CH_FRONT_PAIR },
|
||||
{ CH_WIDE_PAIR, CH_CENTER_PAIR },
|
||||
{ CH_WIDE_PAIR, AV_CH_FRONT_CENTER },
|
||||
{ AV_CH_FRONT_CENTER, CH_FRONT_PAIR },
|
||||
{ AV_CH_FRONT_CENTER, CH_CENTER_PAIR },
|
||||
{ AV_CH_FRONT_CENTER, CH_WIDE_PAIR },
|
||||
{ CH_SIDE_PAIR, CH_DIRECT_PAIR },
|
||||
{ CH_SIDE_PAIR, CH_BACK_PAIR },
|
||||
{ CH_SIDE_PAIR, AV_CH_BACK_CENTER },
|
||||
{ CH_BACK_PAIR, CH_DIRECT_PAIR },
|
||||
{ CH_BACK_PAIR, CH_SIDE_PAIR },
|
||||
{ CH_BACK_PAIR, AV_CH_BACK_CENTER },
|
||||
{ AV_CH_BACK_CENTER, CH_BACK_PAIR },
|
||||
{ AV_CH_BACK_CENTER, CH_DIRECT_PAIR },
|
||||
{ AV_CH_BACK_CENTER, CH_SIDE_PAIR },
|
||||
};
|
||||
|
||||
static void swap_channel_layouts_on_filter(AVFilterContext *filter)
|
||||
{
|
||||
AVFilterLink *link = NULL;
|
||||
uint64_t chlayout;
|
||||
int i, j;
|
||||
int i, j, k;
|
||||
|
||||
for (i = 0; i < filter->nb_inputs; i++) {
|
||||
link = filter->inputs[i];
|
||||
@ -594,27 +627,55 @@ static void swap_channel_layouts_on_filter(AVFilterContext *filter)
|
||||
if (i == filter->nb_inputs)
|
||||
return;
|
||||
|
||||
chlayout = link->out_channel_layouts->channel_layouts[0];
|
||||
|
||||
for (i = 0; i < filter->nb_outputs; i++) {
|
||||
AVFilterLink *outlink = filter->outputs[i];
|
||||
int best_idx, best_score = INT_MIN;
|
||||
int best_idx, best_score = INT_MIN, best_count_diff = INT_MAX;
|
||||
|
||||
if (outlink->type != AVMEDIA_TYPE_AUDIO ||
|
||||
outlink->in_channel_layouts->nb_channel_layouts < 2)
|
||||
continue;
|
||||
|
||||
for (j = 0; j < outlink->in_channel_layouts->nb_channel_layouts; j++) {
|
||||
uint64_t in_chlayout = link->out_channel_layouts->channel_layouts[0];
|
||||
uint64_t out_chlayout = outlink->in_channel_layouts->channel_layouts[j];
|
||||
int matched_channels = av_get_channel_layout_nb_channels(chlayout &
|
||||
out_chlayout);
|
||||
int extra_channels = av_get_channel_layout_nb_channels(out_chlayout &
|
||||
(~chlayout));
|
||||
int score = matched_channels - extra_channels;
|
||||
int in_channels = av_get_channel_layout_nb_channels(in_chlayout);
|
||||
int out_channels = av_get_channel_layout_nb_channels(out_chlayout);
|
||||
int count_diff = out_channels - in_channels;
|
||||
int matched_channels, extra_channels;
|
||||
int score = 0;
|
||||
|
||||
if (score > best_score) {
|
||||
/* channel substitution */
|
||||
for (k = 0; k < FF_ARRAY_ELEMS(ch_subst); k++) {
|
||||
uint64_t cmp0 = ch_subst[k][0];
|
||||
uint64_t cmp1 = ch_subst[k][1];
|
||||
if (( in_chlayout & cmp0) && (!(out_chlayout & cmp0)) &&
|
||||
(out_chlayout & cmp1) && (!( in_chlayout & cmp1))) {
|
||||
in_chlayout &= ~cmp0;
|
||||
out_chlayout &= ~cmp1;
|
||||
/* add score for channel match, minus a deduction for
|
||||
having to do the substitution */
|
||||
score += 10 * av_get_channel_layout_nb_channels(cmp1) - 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* no penalty for LFE channel mismatch */
|
||||
if ( (in_chlayout & AV_CH_LOW_FREQUENCY) &&
|
||||
(out_chlayout & AV_CH_LOW_FREQUENCY))
|
||||
score += 10;
|
||||
in_chlayout &= ~AV_CH_LOW_FREQUENCY;
|
||||
out_chlayout &= ~AV_CH_LOW_FREQUENCY;
|
||||
|
||||
matched_channels = av_get_channel_layout_nb_channels(in_chlayout &
|
||||
out_chlayout);
|
||||
extra_channels = av_get_channel_layout_nb_channels(out_chlayout &
|
||||
(~in_chlayout));
|
||||
score += 10 * matched_channels - 5 * extra_channels;
|
||||
|
||||
if (score > best_score ||
|
||||
(count_diff < best_count_diff && score == best_score)) {
|
||||
best_score = score;
|
||||
best_idx = j;
|
||||
best_count_diff = count_diff;
|
||||
}
|
||||
}
|
||||
FFSWAP(uint64_t, outlink->in_channel_layouts->channel_layouts[0],
|
||||
|
@ -515,6 +515,12 @@ static int gen_pong(URLContext *s, RTMPContext *rt, RTMPPacket *ppkt)
|
||||
uint8_t *p;
|
||||
int ret;
|
||||
|
||||
if (ppkt->data_size < 6) {
|
||||
av_log(s, AV_LOG_ERROR, "Too short ping packet (%d)\n",
|
||||
ppkt->data_size);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
if ((ret = ff_rtmp_packet_create(&pkt, RTMP_NETWORK_CHANNEL, RTMP_PT_PING,
|
||||
ppkt->timestamp + 1, 6)) < 0)
|
||||
return ret;
|
||||
@ -885,9 +891,9 @@ static int handle_chunk_size(URLContext *s, RTMPPacket *pkt)
|
||||
RTMPContext *rt = s->priv_data;
|
||||
int ret;
|
||||
|
||||
if (pkt->data_size != 4) {
|
||||
if (pkt->data_size < 4) {
|
||||
av_log(s, AV_LOG_ERROR,
|
||||
"Chunk size change packet is not 4 bytes long (%d)\n",
|
||||
"Too short chunk size change packet (%d)\n",
|
||||
pkt->data_size);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
@ -913,6 +919,12 @@ static int handle_ping(URLContext *s, RTMPPacket *pkt)
|
||||
RTMPContext *rt = s->priv_data;
|
||||
int t, ret;
|
||||
|
||||
if (pkt->data_size < 2) {
|
||||
av_log(s, AV_LOG_ERROR, "Too short ping packet (%d)\n",
|
||||
pkt->data_size);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
t = AV_RB16(pkt->data);
|
||||
if (t == 6) {
|
||||
if ((ret = gen_pong(s, rt, pkt)) < 0)
|
||||
@ -950,6 +962,13 @@ static int handle_server_bw(URLContext *s, RTMPPacket *pkt)
|
||||
{
|
||||
RTMPContext *rt = s->priv_data;
|
||||
|
||||
if (pkt->data_size < 4) {
|
||||
av_log(s, AV_LOG_ERROR,
|
||||
"Too short server bandwidth report packet (%d)\n",
|
||||
pkt->data_size);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
rt->server_bw = AV_RB32(pkt->data);
|
||||
if (rt->server_bw <= 0) {
|
||||
av_log(s, AV_LOG_ERROR, "Incorrect server bandwidth %d\n",
|
||||
|
@ -246,9 +246,10 @@ static int handle_buffered_output(AVAudioResampleContext *avr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int avresample_convert(AVAudioResampleContext *avr, void **output,
|
||||
int out_plane_size, int out_samples, void **input,
|
||||
int in_plane_size, int in_samples)
|
||||
int attribute_align_arg avresample_convert(AVAudioResampleContext *avr,
|
||||
void **output, int out_plane_size,
|
||||
int out_samples, void **input,
|
||||
int in_plane_size, int in_samples)
|
||||
{
|
||||
AudioData input_buffer;
|
||||
AudioData output_buffer;
|
||||
|
@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len
|
||||
mova [dstq+lenq+mmsize], m2
|
||||
add lenq, mmsize*2
|
||||
jl .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len
|
||||
mova [dstq+lenq+3*mmsize], m3
|
||||
add lenq, mmsize*4
|
||||
jl .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
|
@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
|
||||
add srcq, mmsize*2
|
||||
sub lend, mmsize*2/4
|
||||
jg .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
|
||||
add src0q, mmsize
|
||||
sub lend, mmsize/4
|
||||
jg .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
@ -236,3 +226,296 @@ MIX_1_TO_2_S16P_FLT
|
||||
INIT_XMM avx
|
||||
MIX_1_TO_2_S16P_FLT
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_mix_3_8_to_1_2_fltp/s16p_flt(float/int16_t **src, float **matrix,
|
||||
; int len, int out_ch, int in_ch);
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro MIX_3_8_TO_1_2_FLT 3 ; %1 = in channels, %2 = out channels, %3 = s16p or fltp
|
||||
; define some names to make the code clearer
|
||||
%assign in_channels %1
|
||||
%assign out_channels %2
|
||||
%assign stereo out_channels - 1
|
||||
%ifidn %3, s16p
|
||||
%assign is_s16 1
|
||||
%else
|
||||
%assign is_s16 0
|
||||
%endif
|
||||
|
||||
; determine how many matrix elements must go on the stack vs. mmregs
|
||||
%assign matrix_elements in_channels * out_channels
|
||||
%if is_s16
|
||||
%if stereo
|
||||
%assign needed_mmregs 7
|
||||
%else
|
||||
%assign needed_mmregs 5
|
||||
%endif
|
||||
%else
|
||||
%if stereo
|
||||
%assign needed_mmregs 4
|
||||
%else
|
||||
%assign needed_mmregs 3
|
||||
%endif
|
||||
%endif
|
||||
%assign matrix_elements_mm num_mmregs - needed_mmregs
|
||||
%if matrix_elements < matrix_elements_mm
|
||||
%assign matrix_elements_mm matrix_elements
|
||||
%endif
|
||||
%if matrix_elements_mm < matrix_elements
|
||||
%assign matrix_elements_stack matrix_elements - matrix_elements_mm
|
||||
%else
|
||||
%assign matrix_elements_stack 0
|
||||
%endif
|
||||
|
||||
cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, src0, src1, len, src2, src3, src4, src5, src6, src7
|
||||
|
||||
; get aligned stack space if needed
|
||||
%if matrix_elements_stack > 0
|
||||
%if mmsize == 32
|
||||
%assign bkpreg %1 + 1
|
||||
%define bkpq r %+ bkpreg %+ q
|
||||
mov bkpq, rsp
|
||||
and rsp, ~(mmsize-1)
|
||||
sub rsp, matrix_elements_stack * mmsize
|
||||
%else
|
||||
%assign pad matrix_elements_stack * mmsize + (mmsize - gprsize) - (stack_offset & (mmsize - gprsize))
|
||||
SUB rsp, pad
|
||||
%endif
|
||||
%endif
|
||||
|
||||
; load matrix pointers
|
||||
%define matrix0q r1q
|
||||
%define matrix1q r3q
|
||||
%if stereo
|
||||
mov matrix1q, [matrix0q+gprsize]
|
||||
%endif
|
||||
mov matrix0q, [matrix0q]
|
||||
|
||||
; define matrix coeff names
|
||||
%assign %%i 0
|
||||
%assign %%j needed_mmregs
|
||||
%rep in_channels
|
||||
%if %%i >= matrix_elements_mm
|
||||
CAT_XDEFINE mx_stack_0_, %%i, 1
|
||||
CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize]
|
||||
%else
|
||||
CAT_XDEFINE mx_stack_0_, %%i, 0
|
||||
CAT_XDEFINE mx_0_, %%i, m %+ %%j
|
||||
%assign %%j %%j+1
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%if stereo
|
||||
%assign %%i 0
|
||||
%rep in_channels
|
||||
%if in_channels + %%i >= matrix_elements_mm
|
||||
CAT_XDEFINE mx_stack_1_, %%i, 1
|
||||
CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize]
|
||||
%else
|
||||
CAT_XDEFINE mx_stack_1_, %%i, 0
|
||||
CAT_XDEFINE mx_1_, %%i, m %+ %%j
|
||||
%assign %%j %%j+1
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%endif
|
||||
|
||||
; load/splat matrix coeffs
|
||||
%assign %%i 0
|
||||
%rep in_channels
|
||||
%if mx_stack_0_ %+ %%i
|
||||
VBROADCASTSS m0, [matrix0q+4*%%i]
|
||||
mova mx_0_ %+ %%i, m0
|
||||
%else
|
||||
VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i]
|
||||
%endif
|
||||
%if stereo
|
||||
%if mx_stack_1_ %+ %%i
|
||||
VBROADCASTSS m0, [matrix1q+4*%%i]
|
||||
mova mx_1_ %+ %%i, m0
|
||||
%else
|
||||
VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i]
|
||||
%endif
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
|
||||
; load channel pointers to registers as offsets from the first channel pointer
|
||||
%if ARCH_X86_64
|
||||
movsxd lenq, r2d
|
||||
%endif
|
||||
shl lenq, 2-is_s16
|
||||
%assign %%i 1
|
||||
%rep (in_channels - 1)
|
||||
%if ARCH_X86_32 && in_channels >= 7 && %%i >= 5
|
||||
mov src5q, [src0q+%%i*gprsize]
|
||||
add src5q, lenq
|
||||
mov src %+ %%i %+ m, src5q
|
||||
%else
|
||||
mov src %+ %%i %+ q, [src0q+%%i*gprsize]
|
||||
add src %+ %%i %+ q, lenq
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
mov src0q, [src0q]
|
||||
add src0q, lenq
|
||||
neg lenq
|
||||
.loop
|
||||
; for x86-32 with 7-8 channels we do not have enough gp registers for all src
|
||||
; pointers, so we have to load some of them from the stack each time
|
||||
%define copy_src_from_stack ARCH_X86_32 && in_channels >= 7 && %%i >= 5
|
||||
%if is_s16
|
||||
; mix with s16p input
|
||||
mova m0, [src0q+lenq]
|
||||
S16_TO_S32_SX 0, 1
|
||||
cvtdq2ps m0, m0
|
||||
cvtdq2ps m1, m1
|
||||
%if stereo
|
||||
mulps m2, m0, mx_1_0
|
||||
mulps m3, m1, mx_1_0
|
||||
%endif
|
||||
mulps m0, m0, mx_0_0
|
||||
mulps m1, m1, mx_0_0
|
||||
%assign %%i 1
|
||||
%rep (in_channels - 1)
|
||||
%if copy_src_from_stack
|
||||
%define src_ptr src5q
|
||||
%else
|
||||
%define src_ptr src %+ %%i %+ q
|
||||
%endif
|
||||
%if stereo
|
||||
%if copy_src_from_stack
|
||||
mov src_ptr, src %+ %%i %+ m
|
||||
%endif
|
||||
mova m4, [src_ptr+lenq]
|
||||
S16_TO_S32_SX 4, 5
|
||||
cvtdq2ps m4, m4
|
||||
cvtdq2ps m5, m5
|
||||
fmaddps m2, m4, mx_1_ %+ %%i, m2, m6
|
||||
fmaddps m3, m5, mx_1_ %+ %%i, m3, m6
|
||||
fmaddps m0, m4, mx_0_ %+ %%i, m0, m4
|
||||
fmaddps m1, m5, mx_0_ %+ %%i, m1, m5
|
||||
%else
|
||||
%if copy_src_from_stack
|
||||
mov src_ptr, src %+ %%i %+ m
|
||||
%endif
|
||||
mova m2, [src_ptr+lenq]
|
||||
S16_TO_S32_SX 2, 3
|
||||
cvtdq2ps m2, m2
|
||||
cvtdq2ps m3, m3
|
||||
fmaddps m0, m2, mx_0_ %+ %%i, m0, m4
|
||||
fmaddps m1, m3, mx_0_ %+ %%i, m1, m4
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%if stereo
|
||||
cvtps2dq m2, m2
|
||||
cvtps2dq m3, m3
|
||||
packssdw m2, m3
|
||||
mova [src1q+lenq], m2
|
||||
%endif
|
||||
cvtps2dq m0, m0
|
||||
cvtps2dq m1, m1
|
||||
packssdw m0, m1
|
||||
mova [src0q+lenq], m0
|
||||
%else
|
||||
; mix with fltp input
|
||||
%if stereo || mx_stack_0_0
|
||||
mova m0, [src0q+lenq]
|
||||
%endif
|
||||
%if stereo
|
||||
mulps m1, m0, mx_1_0
|
||||
%endif
|
||||
%if stereo || mx_stack_0_0
|
||||
mulps m0, m0, mx_0_0
|
||||
%else
|
||||
mulps m0, [src0q+lenq], mx_0_0
|
||||
%endif
|
||||
%assign %%i 1
|
||||
%rep (in_channels - 1)
|
||||
%if copy_src_from_stack
|
||||
%define src_ptr src5q
|
||||
mov src_ptr, src %+ %%i %+ m
|
||||
%else
|
||||
%define src_ptr src %+ %%i %+ q
|
||||
%endif
|
||||
; avoid extra load for mono if matrix is in a mm register
|
||||
%if stereo || mx_stack_0_ %+ %%i
|
||||
mova m2, [src_ptr+lenq]
|
||||
%endif
|
||||
%if stereo
|
||||
fmaddps m1, m2, mx_1_ %+ %%i, m1, m3
|
||||
%endif
|
||||
%if stereo || mx_stack_0_ %+ %%i
|
||||
fmaddps m0, m2, mx_0_ %+ %%i, m0, m2
|
||||
%else
|
||||
fmaddps m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
mova [src0q+lenq], m0
|
||||
%if stereo
|
||||
mova [src1q+lenq], m1
|
||||
%endif
|
||||
%endif
|
||||
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
; restore stack pointer
|
||||
%if matrix_elements_stack > 0
|
||||
%if mmsize == 32
|
||||
mov rsp, bkpq
|
||||
%else
|
||||
ADD rsp, pad
|
||||
%endif
|
||||
%endif
|
||||
; zero ymm high halves
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
%endif
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%macro MIX_3_8_TO_1_2_FLT_FUNCS 0
|
||||
%assign %%i 3
|
||||
%rep 6
|
||||
INIT_XMM sse
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, fltp
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, fltp
|
||||
INIT_XMM sse2
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, s16p
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, s16p
|
||||
INIT_XMM sse4
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, s16p
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, s16p
|
||||
; do not use ymm AVX or FMA4 in x86-32 for 6 or more channels due to stack alignment issues
|
||||
%if HAVE_AVX
|
||||
%if ARCH_X86_64 || %%i < 6
|
||||
INIT_YMM avx
|
||||
%else
|
||||
INIT_XMM avx
|
||||
%endif
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, fltp
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, fltp
|
||||
INIT_XMM avx
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, s16p
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, s16p
|
||||
%endif
|
||||
%if HAVE_FMA4
|
||||
%if ARCH_X86_64 || %%i < 6
|
||||
INIT_YMM fma4
|
||||
%else
|
||||
INIT_XMM fma4
|
||||
%endif
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, fltp
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, fltp
|
||||
INIT_XMM fma4
|
||||
MIX_3_8_TO_1_2_FLT %%i, 1, s16p
|
||||
MIX_3_8_TO_1_2_FLT %%i, 2, s16p
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
MIX_3_8_TO_1_2_FLT_FUNCS
|
||||
|
@ -47,6 +47,129 @@ extern void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
|
||||
extern void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
|
||||
int out_ch, int in_ch);
|
||||
|
||||
#define DEFINE_MIX_3_8_TO_1_2(chan) \
|
||||
extern void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
\
|
||||
extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
\
|
||||
extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
\
|
||||
extern void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
\
|
||||
extern void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
\
|
||||
extern void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
\
|
||||
extern void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch); \
|
||||
extern void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \
|
||||
float **matrix, int len, \
|
||||
int out_ch, int in_ch);
|
||||
|
||||
DEFINE_MIX_3_8_TO_1_2(3)
|
||||
DEFINE_MIX_3_8_TO_1_2(4)
|
||||
DEFINE_MIX_3_8_TO_1_2(5)
|
||||
DEFINE_MIX_3_8_TO_1_2(6)
|
||||
DEFINE_MIX_3_8_TO_1_2(7)
|
||||
DEFINE_MIX_3_8_TO_1_2(8)
|
||||
|
||||
#define SET_MIX_3_8_TO_1_2(chan) \
|
||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, 16, 4, "SSE", \
|
||||
ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, 16, 4, "SSE", \
|
||||
ff_mix_## chan ##_to_2_fltp_flt_sse); \
|
||||
} \
|
||||
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, 16, 8, "SSE2", \
|
||||
ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, 16, 8, "SSE2", \
|
||||
ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
|
||||
} \
|
||||
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, 16, 8, "SSE4", \
|
||||
ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, 16, 8, "SSE4", \
|
||||
ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
|
||||
} \
|
||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { \
|
||||
int ptr_align = 32; \
|
||||
int smp_align = 8; \
|
||||
if (ARCH_X86_32 || chan >= 6) { \
|
||||
ptr_align = 16; \
|
||||
smp_align = 4; \
|
||||
} \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, ptr_align, smp_align, "AVX", \
|
||||
ff_mix_ ## chan ## _to_1_fltp_flt_avx); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, ptr_align, smp_align, "AVX", \
|
||||
ff_mix_ ## chan ## _to_2_fltp_flt_avx); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, 16, 8, "AVX", \
|
||||
ff_mix_ ## chan ## _to_1_s16p_flt_avx); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, 16, 8, "AVX", \
|
||||
ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
|
||||
} \
|
||||
if (mm_flags & AV_CPU_FLAG_FMA4 && HAVE_FMA4) { \
|
||||
int ptr_align = 32; \
|
||||
int smp_align = 8; \
|
||||
if (ARCH_X86_32 || chan >= 6) { \
|
||||
ptr_align = 16; \
|
||||
smp_align = 4; \
|
||||
} \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, ptr_align, smp_align, "FMA4", \
|
||||
ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, ptr_align, smp_align, "FMA4", \
|
||||
ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 1, 16, 8, "FMA4", \
|
||||
ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||
chan, 2, 16, 8, "FMA4", \
|
||||
ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \
|
||||
}
|
||||
|
||||
av_cold void ff_audio_mix_init_x86(AudioMix *am)
|
||||
{
|
||||
#if HAVE_YASM
|
||||
@ -80,5 +203,12 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
|
||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
||||
1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
|
||||
}
|
||||
|
||||
SET_MIX_3_8_TO_1_2(3)
|
||||
SET_MIX_3_8_TO_1_2(4)
|
||||
SET_MIX_3_8_TO_1_2(5)
|
||||
SET_MIX_3_8_TO_1_2(6)
|
||||
SET_MIX_3_8_TO_1_2(7)
|
||||
SET_MIX_3_8_TO_1_2(8)
|
||||
#endif
|
||||
}
|
||||
|
@ -26,7 +26,8 @@
|
||||
pmovsxwd m%1, m%1
|
||||
SWAP %1, %2
|
||||
%else
|
||||
punpckhwd m%2, m%1
|
||||
mova m%2, m%1
|
||||
punpckhwd m%2, m%2
|
||||
punpcklwd m%1, m%1
|
||||
psrad m%2, 16
|
||||
psrad m%1, 16
|
||||
|
@ -797,11 +797,10 @@ int main(int argc, char **argv)
|
||||
av_expr_parse_and_eval(&d, *expr,
|
||||
const_names, const_values,
|
||||
NULL, NULL, NULL, NULL, NULL, 0, NULL);
|
||||
if(isnan(d)){
|
||||
if (isnan(d))
|
||||
printf("'%s' -> nan\n\n", *expr);
|
||||
}else{
|
||||
else
|
||||
printf("'%s' -> %f\n\n", *expr, d);
|
||||
}
|
||||
}
|
||||
|
||||
av_expr_parse_and_eval(&d, "1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)",
|
||||
|
@ -42,12 +42,7 @@ ALIGN 16
|
||||
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
@ -88,12 +83,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
|
||||
mova [dstq+lenq+mmsize], m2
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
|
@ -392,11 +392,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
|
||||
%macro RET 0
|
||||
WIN64_RESTORE_XMM_INTERNAL rsp
|
||||
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro REP_RET 0
|
||||
%if regs_used > 7 || xmm_regs_used > 6
|
||||
%if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
|
||||
RET
|
||||
%else
|
||||
rep ret
|
||||
@ -433,11 +436,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
|
||||
|
||||
%macro RET 0
|
||||
POP_IF_USED 14, 13, 12, 11, 10, 9
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro REP_RET 0
|
||||
%if regs_used > 9
|
||||
%if regs_used > 9 || mmsize == 32
|
||||
RET
|
||||
%else
|
||||
rep ret
|
||||
@ -479,11 +485,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
|
||||
%macro RET 0
|
||||
POP_IF_USED 6, 5, 4, 3
|
||||
%if mmsize == 32
|
||||
vzeroupper
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro REP_RET 0
|
||||
%if regs_used > 3
|
||||
%if regs_used > 3 || mmsize == 32
|
||||
RET
|
||||
%else
|
||||
rep ret
|
||||
@ -1126,16 +1135,22 @@ AVX_INSTR pfmul, 1, 0, 1
|
||||
%undef j
|
||||
|
||||
%macro FMA_INSTR 3
|
||||
%macro %1 4-7 %1, %2, %3
|
||||
%if cpuflag(xop)
|
||||
v%5 %1, %2, %3, %4
|
||||
%macro %1 5-8 %1, %2, %3
|
||||
%if cpuflag(xop) || cpuflag(fma4)
|
||||
v%6 %1, %2, %3, %4
|
||||
%else
|
||||
%6 %1, %2, %3
|
||||
%7 %1, %4
|
||||
%ifidn %1, %4
|
||||
%7 %5, %2, %3
|
||||
%8 %1, %4, %5
|
||||
%else
|
||||
%7 %1, %2, %3
|
||||
%8 %1, %4
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
%endmacro
|
||||
|
||||
FMA_INSTR fmaddps, mulps, addps
|
||||
FMA_INSTR pmacsdd, pmulld, paddd
|
||||
FMA_INSTR pmacsww, pmullw, paddw
|
||||
FMA_INSTR pmadcswd, pmaddwd, paddd
|
||||
|
@ -15,9 +15,6 @@ ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
|
||||
|
||||
OBJDIRS += tests/data tests/vsynth1
|
||||
|
||||
# Required due to missing automatic dependency tracking for HOSTOBJS.
|
||||
tests/rotozoom.o tests/videogen.o: tests/utils.c
|
||||
|
||||
tests/vsynth1/00.pgm: tests/videogen$(HOSTEXESUF) | tests/vsynth1
|
||||
$(M)./$< 'tests/vsynth1/'
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user