x86: conditionally compile H.264 QPEL optimizations
This commit is contained in:
parent
3816642eab
commit
915a2a0a65
15
configure
vendored
15
configure
vendored
@ -1165,6 +1165,7 @@ CONFIG_EXTRA="
|
||||
h264chroma
|
||||
h264dsp
|
||||
h264pred
|
||||
h264qpel
|
||||
huffman
|
||||
lgplv3
|
||||
lpc
|
||||
@ -1311,7 +1312,7 @@ h263_encoder_select="aandct"
|
||||
h263_vaapi_hwaccel_select="vaapi h263_decoder"
|
||||
h263i_decoder_select="h263_decoder"
|
||||
h263p_encoder_select="h263_encoder"
|
||||
h264_decoder_select="golomb h264chroma h264dsp h264pred"
|
||||
h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
|
||||
h264_dxva2_hwaccel_deps="dxva2api_h"
|
||||
h264_dxva2_hwaccel_select="dxva2 h264_decoder"
|
||||
h264_vaapi_hwaccel_select="vaapi h264_decoder"
|
||||
@ -1366,14 +1367,14 @@ rv10_decoder_select="h263_decoder"
|
||||
rv10_encoder_select="h263_encoder"
|
||||
rv20_decoder_select="h263_decoder"
|
||||
rv20_encoder_select="h263_encoder"
|
||||
rv30_decoder_select="golomb h264chroma h264pred"
|
||||
rv40_decoder_select="golomb h264chroma h264pred"
|
||||
rv30_decoder_select="golomb h264chroma h264pred h264qpel"
|
||||
rv40_decoder_select="golomb h264chroma h264pred h264qpel"
|
||||
shorten_decoder_select="golomb"
|
||||
sipr_decoder_select="lsp"
|
||||
snow_decoder_select="dwt"
|
||||
snow_encoder_select="aandct dwt"
|
||||
svq1_encoder_select="aandct"
|
||||
svq3_decoder_select="golomb h264chroma h264dsp h264pred"
|
||||
svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
|
||||
svq3_decoder_suggest="zlib"
|
||||
theora_decoder_select="vp3_decoder"
|
||||
tiff_decoder_suggest="zlib"
|
||||
@ -1381,7 +1382,7 @@ tiff_encoder_suggest="zlib"
|
||||
truehd_decoder_select="mlp_decoder"
|
||||
tscc_decoder_select="zlib"
|
||||
twinvq_decoder_select="mdct lsp sinewin"
|
||||
vc1_decoder_select="h263_decoder h264chroma"
|
||||
vc1_decoder_select="h263_decoder h264chroma h264qpel"
|
||||
vc1_dxva2_hwaccel_deps="dxva2api_h"
|
||||
vc1_dxva2_hwaccel_select="dxva2 vc1_decoder"
|
||||
vc1_vaapi_hwaccel_select="vaapi vc1_decoder"
|
||||
@ -1392,7 +1393,7 @@ vorbis_encoder_select="mdct"
|
||||
vp6_decoder_select="huffman"
|
||||
vp6a_decoder_select="vp6_decoder"
|
||||
vp6f_decoder_select="vp6_decoder"
|
||||
vp8_decoder_select="h264pred"
|
||||
vp8_decoder_select="h264pred h264qpel"
|
||||
wmapro_decoder_select="mdct sinewin"
|
||||
wmav1_decoder_select="mdct sinewin"
|
||||
wmav1_encoder_select="mdct sinewin"
|
||||
@ -1419,7 +1420,7 @@ vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
|
||||
vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
|
||||
|
||||
# parsers
|
||||
h264_parser_select="golomb h264chroma h264dsp h264pred"
|
||||
h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel"
|
||||
|
||||
# external libraries
|
||||
libdirac_decoder_deps="libdirac !libschroedinger"
|
||||
|
@ -23,6 +23,7 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
|
||||
YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
|
||||
x86/h264_intrapred_10bit.o
|
||||
MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
|
||||
YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_10bit.o
|
||||
|
||||
MMX-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
|
||||
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
|
||||
@ -62,7 +63,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
|
||||
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
|
||||
x86/deinterlace.o \
|
||||
x86/fmtconvert.o \
|
||||
x86/h264_qpel_10bit.o \
|
||||
$(YASM-OBJS-yes)
|
||||
|
||||
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
|
||||
|
@ -2479,6 +2479,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
|
||||
}
|
||||
|
||||
if (CONFIG_H264QPEL) {
|
||||
SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
|
||||
SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
|
||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
|
||||
@ -2510,6 +2511,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
|
||||
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
|
||||
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
|
||||
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
|
||||
}
|
||||
|
||||
#if HAVE_YASM
|
||||
if (!high_bit_depth && CONFIG_H264CHROMA) {
|
||||
@ -2577,6 +2579,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
|
||||
}
|
||||
|
||||
if (CONFIG_H264QPEL) {
|
||||
SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
|
||||
@ -2597,6 +2600,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
|
||||
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
|
||||
}
|
||||
|
||||
#if HAVE_YASM
|
||||
if (!high_bit_depth && CONFIG_H264CHROMA) {
|
||||
@ -2671,11 +2675,12 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
||||
c->put_pixels_tab[0][0] = put_pixels16_sse2;
|
||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
|
||||
if (CONFIG_H264QPEL)
|
||||
H264_QPEL_FUNCS(0, 0, sse2);
|
||||
}
|
||||
}
|
||||
|
||||
if (!high_bit_depth) {
|
||||
if (!high_bit_depth && CONFIG_H264QPEL) {
|
||||
H264_QPEL_FUNCS(0, 1, sse2);
|
||||
H264_QPEL_FUNCS(0, 2, sse2);
|
||||
H264_QPEL_FUNCS(0, 3, sse2);
|
||||
@ -2692,6 +2697,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
||||
|
||||
#if HAVE_YASM
|
||||
if (bit_depth == 10) {
|
||||
if (CONFIG_H264QPEL) {
|
||||
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
|
||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
|
||||
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
|
||||
@ -2699,7 +2705,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
||||
H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
|
||||
H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
|
||||
H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
|
||||
|
||||
}
|
||||
if (CONFIG_H264CHROMA) {
|
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
|
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
|
||||
@ -2729,7 +2735,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
|
||||
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||
const int bit_depth = avctx->bits_per_raw_sample;
|
||||
|
||||
if (!high_bit_depth) {
|
||||
if (!high_bit_depth && CONFIG_H264QPEL) {
|
||||
H264_QPEL_FUNCS(1, 0, ssse3);
|
||||
H264_QPEL_FUNCS(1, 1, ssse3);
|
||||
H264_QPEL_FUNCS(1, 2, ssse3);
|
||||
@ -2744,7 +2750,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
|
||||
H264_QPEL_FUNCS(3, 3, ssse3);
|
||||
}
|
||||
#if HAVE_YASM
|
||||
else if (bit_depth == 10) {
|
||||
else if (bit_depth == 10 && CONFIG_H264QPEL) {
|
||||
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
|
||||
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
|
||||
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
|
||||
@ -2788,9 +2794,11 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
|
||||
if (bit_depth == 10) {
|
||||
// AVX implies !cache64.
|
||||
// TODO: Port cache(32|64) detection from x264.
|
||||
if (CONFIG_H264QPEL) {
|
||||
H264_QPEL_FUNCS_10(1, 0, sse2);
|
||||
H264_QPEL_FUNCS_10(2, 0, sse2);
|
||||
H264_QPEL_FUNCS_10(3, 0, sse2);
|
||||
}
|
||||
|
||||
if (CONFIG_H264CHROMA) {
|
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
|
||||
|
Loading…
Reference in New Issue
Block a user