diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index ecfd332ef..24e5b8b91 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -563,234 +563,162 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Inverse transform if (vpx_config("CONFIG_VP9") eq "yes") { + +add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { + specialize qw/vpx_idct4x4_16_add neon sse2/; + + specialize qw/vpx_idct4x4_1_add neon sse2/; + + specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/; + + specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; + + specialize qw/vpx_idct8x8_1_add neon sse2/; + + specialize qw/vpx_idct16x16_256_add neon sse2/; + + specialize qw/vpx_idct16x16_38_add neon sse2/; + $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; + + specialize qw/vpx_idct16x16_10_add neon sse2/; + + specialize qw/vpx_idct16x16_1_add neon sse2/; + + specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/; + + specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; + $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; + + specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; + + specialize qw/vpx_idct32x32_1_add neon sse2/; +} # !CONFIG_EMULATE_HARDWARE + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_16_add sse2/; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct4x4_1_add neon/; + add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + + add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_1_add neon/; + add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_1_add neon/; add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_1_add neon sse2/; - add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - # Force C versions if CONFIG_EMULATE_HARDWARE is 1 - if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - } else { - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct4x4_16_add neon sse2/; - - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct4x4_1_add neon sse2/; - - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/; - - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; - - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct8x8_1_add neon sse2/; - - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_256_add neon sse2/; - - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_38_add neon sse2/; - $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; - - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_10_add neon sse2/; - - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_1_add neon sse2/; - - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/; - - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; - # Need to add 135 eob idct32x32 implementations. - $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; - - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; - - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_1_add neon sse2/; - - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_highbd_idct4x4_16_add neon sse2/; - add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_64_add neon sse2/; - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_12_add neon sse2/; - add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; - add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; $vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2; - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add neon sse2/; - add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_1024_add neon/; - add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_135_add neon/; - add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_34_add neon/; - } # CONFIG_EMULATE_HARDWARE + } # !CONFIG_EMULATE_HARDWARE } else { - # Force C versions if CONFIG_EMULATE_HARDWARE is 1 - if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { + specialize qw/vpx_idct4x4_16_add dspr2 msa/; - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct4x4_1_add dspr2 msa/; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct8x8_64_add dspr2 msa/; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct8x8_12_add dspr2 msa/; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct8x8_1_add dspr2 msa/; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + specialize qw/vpx_idct16x16_256_add dspr2 msa/; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - } else { - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/; - - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/; - - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/; - - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct8x8_64_add sse2 ssse3 neon dspr2 msa/; - - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct8x8_12_add sse2 ssse3 neon dspr2 msa/; - - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/; - - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/; - - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_38_add sse2 neon dspr2 msa/; - $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; + specialize qw/vpx_idct16x16_38_add dspr2 msa/; $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2; $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct16x16_10_add dspr2 msa/; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_1024_add sse2 ssse3 neon dspr2 msa/; + specialize qw/vpx_idct16x16_1_add dspr2 msa/; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_135_add sse2 ssse3 neon dspr2 msa/; - $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; + specialize qw/vpx_idct32x32_1024_add dspr2 msa/; + + specialize qw/vpx_idct32x32_135_add dspr2 msa/; $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_34_add sse2 ssse3 neon dspr2 msa/; + specialize qw/vpx_idct32x32_34_add dspr2 msa/; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct32x32_1_add dspr2 msa/; - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - specialize qw/vpx_iwht4x4_1_add msa/; - - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_16_add msa sse2/; - } # CONFIG_EMULATE_HARDWARE + + specialize qw/vpx_iwht4x4_1_add msa/; + } # !CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9