Allow specifying opt dependencies

If optimizations use more than one cpu feature, allow
specifying them so that '--disable-X' still works

https://code.google.com/p/webm/issues/detail?id=854

Change-Id: I3108ea37b397371a2be84dd5f2380b304db23f18
This commit is contained in:
Johann 2014-09-10 10:27:58 -07:00
parent 89ffda0ddf
commit 8645a53039
4 changed files with 23 additions and 16 deletions

View File

@ -49,7 +49,7 @@ open CONFIG_FILE, $opts{config} or
my %config = (); my %config = ();
while (<CONFIG_FILE>) { while (<CONFIG_FILE>) {
next if !/^CONFIG_/; next if !/^(?:CONFIG_|HAVE_)/;
chomp; chomp;
my @pair = split /=/; my @pair = split /=/;
$config{$pair[0]} = $pair[1]; $config{$pair[0]} = $pair[1];

View File

@ -645,7 +645,7 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(64, 64, &convolve8_ssse3))); make_tuple(64, 64, &convolve8_ssse3)));
#endif #endif
#if HAVE_AVX2 #if HAVE_AVX2 && HAVE_SSSE3
const ConvolveFunctions convolve8_avx2( const ConvolveFunctions convolve8_avx2(
vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
@ -665,7 +665,7 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
make_tuple(64, 32, &convolve8_avx2), make_tuple(64, 32, &convolve8_avx2),
make_tuple(32, 64, &convolve8_avx2), make_tuple(32, 64, &convolve8_avx2),
make_tuple(64, 64, &convolve8_avx2))); make_tuple(64, 64, &convolve8_avx2)));
#endif #endif // HAVE_AVX2 && HAVE_SSSE3
#if HAVE_NEON_ASM #if HAVE_NEON_ASM
const ConvolveFunctions convolve8_neon( const ConvolveFunctions convolve8_neon(

View File

@ -45,6 +45,13 @@ if ($opts{arch} eq "x86_64") {
$avx_x86_64 = $avx2_x86_64 = ''; $avx_x86_64 = $avx2_x86_64 = '';
} }
# optimizations which depend on multiple features
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
$avx2_ssse3 = 'avx2';
} else {
$avx2_ssse3 = '';
}
# #
# RECON # RECON
# #
@ -296,15 +303,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon; $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2 avx2/; specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3";
$vp9_convolve8_neon_asm=vp9_convolve8_neon; $vp9_convolve8_neon_asm=vp9_convolve8_neon;
add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2 avx2/; specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3";
$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon; $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2 avx2/; specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/, "$avx2_ssse3";
$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon; $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

View File

@ -139,25 +139,25 @@ void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
} \ } \
} }
#if HAVE_AVX2 #if HAVE_AVX2 && HAVE_SSSE3
filter8_1dfunction vp9_filter_block1d16_v8_avx2; filter8_1dfunction vp9_filter_block1d16_v8_avx2;
filter8_1dfunction vp9_filter_block1d16_h8_avx2; filter8_1dfunction vp9_filter_block1d16_h8_avx2;
filter8_1dfunction vp9_filter_block1d4_v8_ssse3; filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
#if (ARCH_X86_64) #if ARCH_X86_64
filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
#else #else // ARCH_X86
filter8_1dfunction vp9_filter_block1d8_v8_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
filter8_1dfunction vp9_filter_block1d8_h8_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
#endif #endif // ARCH_X86_64 / ARCH_X86
filter8_1dfunction vp9_filter_block1d16_v2_ssse3; filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
filter8_1dfunction vp9_filter_block1d16_h2_ssse3; filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
filter8_1dfunction vp9_filter_block1d8_v2_ssse3; filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
@ -190,9 +190,9 @@ FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
// const int16_t *filter_y, int y_step_q4, // const int16_t *filter_y, int y_step_q4,
// int w, int h); // int w, int h);
FUN_CONV_2D(, avx2); FUN_CONV_2D(, avx2);
#endif #endif // HAVE_AX2 && HAVE_SSSE3
#if HAVE_SSSE3 #if HAVE_SSSE3
#if (ARCH_X86_64) #if ARCH_X86_64
filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
@ -204,14 +204,14 @@ filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
#else #else // ARCH_X86
filter8_1dfunction vp9_filter_block1d16_v8_ssse3; filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
filter8_1dfunction vp9_filter_block1d8_v8_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
filter8_1dfunction vp9_filter_block1d8_h8_ssse3; filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
filter8_1dfunction vp9_filter_block1d4_v8_ssse3; filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_ssse3; filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
#endif #endif // ARCH_X86_64 / ARCH_X86
filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
@ -270,7 +270,7 @@ FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
// int w, int h); // int w, int h);
FUN_CONV_2D(, ssse3); FUN_CONV_2D(, ssse3);
FUN_CONV_2D(avg_ , ssse3); FUN_CONV_2D(avg_ , ssse3);
#endif #endif // HAVE_SSSE3
#if HAVE_SSE2 #if HAVE_SSE2
filter8_1dfunction vp9_filter_block1d16_v8_sse2; filter8_1dfunction vp9_filter_block1d16_v8_sse2;
@ -336,4 +336,4 @@ FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
// int w, int h); // int w, int h);
FUN_CONV_2D(, sse2); FUN_CONV_2D(, sse2);
FUN_CONV_2D(avg_ , sse2); FUN_CONV_2D(avg_ , sse2);
#endif #endif // HAVE_SSE2