Merge changes I8bb660de,Ica51d780,I6037525d

* changes:
  Clean specializes of idct functions
  Clean add_protos of highbd idct functions
  Clean add_protos of idct functions
This commit is contained in:
Linfeng Zhang 2017-05-03 19:17:55 +00:00 committed by Gerrit Code Review
commit a10a5cb356

View File

@ -563,234 +563,162 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Inverse transform
if (vpx_config("CONFIG_VP9") eq "yes") {
add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_idct4x4_16_add neon sse2/;
specialize qw/vpx_idct4x4_1_add neon sse2/;
specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/;
specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
specialize qw/vpx_idct8x8_1_add neon sse2/;
specialize qw/vpx_idct16x16_256_add neon sse2/;
specialize qw/vpx_idct16x16_38_add neon sse2/;
$vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
specialize qw/vpx_idct16x16_10_add neon sse2/;
specialize qw/vpx_idct16x16_1_add neon sse2/;
specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
$vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_1_add neon sse2/;
} # !CONFIG_EMULATE_HARDWARE
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_iwht4x4_16_add sse2/;
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct4x4_1_add neon/;
add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct8x8_1_add neon/;
add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_1_add neon/;
add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_1_add neon sse2/;
add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
} else {
add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct4x4_16_add neon sse2/;
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct4x4_1_add neon sse2/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/;
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_1_add neon sse2/;
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_256_add neon sse2/;
add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_38_add neon sse2/;
$vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_10_add neon sse2/;
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_1_add neon sse2/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/;
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
# Need to add 135 eob idct32x32 implementations.
$vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_1_add neon sse2/;
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_highbd_idct4x4_16_add neon sse2/;
add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct8x8_64_add neon sse2/;
add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct8x8_12_add neon sse2/;
add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_256_add neon sse2/;
add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_38_add neon sse2/;
$vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2;
add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct16x16_10_add neon sse2/;
add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_1024_add neon/;
add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_135_add neon/;
add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd";
specialize qw/vpx_highbd_idct32x32_34_add neon/;
} # CONFIG_EMULATE_HARDWARE
} # !CONFIG_EMULATE_HARDWARE
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_idct4x4_16_add dspr2 msa/;
add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct4x4_1_add dspr2 msa/;
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_64_add dspr2 msa/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_12_add dspr2 msa/;
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_1_add dspr2 msa/;
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_256_add dspr2 msa/;
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
} else {
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_64_add sse2 ssse3 neon dspr2 msa/;
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct8x8_12_add sse2 ssse3 neon dspr2 msa/;
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_38_add sse2 neon dspr2 msa/;
$vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2;
specialize qw/vpx_idct16x16_38_add dspr2 msa/;
$vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2;
$vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa;
add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;
specialize qw/vpx_idct16x16_10_add dspr2 msa/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_1024_add sse2 ssse3 neon dspr2 msa/;
specialize qw/vpx_idct16x16_1_add dspr2 msa/;
add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_135_add sse2 ssse3 neon dspr2 msa/;
$vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
specialize qw/vpx_idct32x32_1024_add dspr2 msa/;
specialize qw/vpx_idct32x32_135_add dspr2 msa/;
$vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2;
$vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_34_add sse2 ssse3 neon dspr2 msa/;
specialize qw/vpx_idct32x32_34_add dspr2 msa/;
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/;
specialize qw/vpx_idct32x32_1_add dspr2 msa/;
add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_iwht4x4_1_add msa/;
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
specialize qw/vpx_iwht4x4_16_add msa sse2/;
} # CONFIG_EMULATE_HARDWARE
specialize qw/vpx_iwht4x4_1_add msa/;
} # !CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9