enable idct*_1_add_neon in high-bitdepth builds
these are compatible as they only load one element of the input so the larger size of tran_low_t makes no difference in little endian builds. note the asm is incompatible with big-endian, but there are other points of failure there so currently it's considered unsupported. BUG=webm:1294 Change-Id: Icd2665a0699bccae92d1bea43a95b0a83fb17028
This commit is contained in:
parent
efb56ec3ff
commit
a6be7ba1aa
@ -201,7 +201,19 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
|
||||
&vpx_idct4x4_1_add_c, TX_4X4, 1)));
|
||||
|
||||
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, PartialIDctTest,
|
||||
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
|
||||
&vpx_idct32x32_1_add_neon, TX_32X32, 1),
|
||||
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
|
||||
&vpx_idct16x16_1_add_neon, TX_16X16, 1),
|
||||
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
|
||||
&vpx_idct8x8_1_add_neon, TX_8X8, 1),
|
||||
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
|
||||
&vpx_idct4x4_1_add_neon, TX_4X4, 1)));
|
||||
#else // !CONFIG_VP9_HIGHBITDEPTH
|
||||
// 32x32_34_ 32x32_135_ are implemented using the 1024 version.
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, PartialIDctTest,
|
||||
@ -229,7 +241,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vpx_idct4x4_16_add_neon, TX_4X4, 16),
|
||||
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
|
||||
&vpx_idct4x4_1_add_neon, TX_4X4, 1)));
|
||||
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
// 32x32_135_ is implemented using the 1024 version.
|
||||
|
@ -199,23 +199,15 @@ DSP_SRCS-$(HAVE_NEON_ASM) += arm/save_reg_neon$(ASM)
|
||||
|
||||
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
|
||||
ifeq ($(HAVE_NEON_ASM),yes)
|
||||
DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct8x8_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct16x16_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct16x16_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct32x32_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct32x32_add_neon$(ASM)
|
||||
else
|
||||
ifeq ($(HAVE_NEON),yes)
|
||||
DSP_SRCS-yes += arm/idct4x4_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct4x4_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct8x8_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct8x8_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct16x16_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct16x16_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct32x32_1_add_neon.c
|
||||
DSP_SRCS-yes += arm/idct32x32_add_neon.c
|
||||
endif # HAVE_NEON
|
||||
endif # HAVE_NEON_ASM
|
||||
@ -233,7 +225,20 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/itrans8_dspr2.c
|
||||
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans16_dspr2.c
|
||||
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_dspr2.c
|
||||
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_cols_dspr2.c
|
||||
endif # CONFIG_VP9_HIGHBITDEPTH
|
||||
endif # !CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
ifeq ($(HAVE_NEON_ASM),yes)
|
||||
DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct16x16_1_add_neon$(ASM)
|
||||
DSP_SRCS-yes += arm/idct32x32_1_add_neon$(ASM)
|
||||
else
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/idct4x4_1_add_neon.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/idct8x8_1_add_neon.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_1_add_neon.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_1_add_neon.c
|
||||
endif # HAVE_NEON_ASM
|
||||
|
||||
endif # CONFIG_VP9
|
||||
|
||||
# quantization
|
||||
|
@ -647,7 +647,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_idct4x4_16_add sse2/;
|
||||
|
||||
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct4x4_1_add sse2/;
|
||||
specialize qw/vpx_idct4x4_1_add neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64";
|
||||
@ -656,7 +656,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64";
|
||||
|
||||
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct8x8_1_add sse2/;
|
||||
specialize qw/vpx_idct8x8_1_add neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct16x16_256_add sse2/;
|
||||
@ -665,7 +665,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_idct16x16_10_add sse2/;
|
||||
|
||||
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct16x16_1_add sse2/;
|
||||
specialize qw/vpx_idct16x16_1_add neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64";
|
||||
@ -679,7 +679,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64";
|
||||
|
||||
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vpx_idct32x32_1_add sse2/;
|
||||
specialize qw/vpx_idct32x32_1_add neon sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vpx_highbd_idct4x4_16_add sse2/;
|
||||
|
Loading…
x
Reference in New Issue
Block a user