From a7a57d975626308d1fb3b88dc9ab937508521efa Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Tue, 2 May 2017 11:19:44 -0700 Subject: [PATCH 1/3] Clean add_protos of idct functions Change-Id: I6037525d92ec172810edab720389eb1865ed3b1a --- vpx_dsp/vpx_dsp_rtcd_defs.pl | 123 ++++++++++------------------------- 1 file changed, 33 insertions(+), 90 deletions(-) diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index a93e95ebe..d200a01bb 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -563,12 +563,40 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Inverse transform if (vpx_config("CONFIG_VP9") eq "yes") { + +add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + +add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_16_add sse2/; add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; @@ -593,32 +621,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Force C versions if CONFIG_EMULATE_HARDWARE is 1 if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; @@ -637,46 +639,33 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; } else { - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_16_add neon sse2/; - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_1_add neon sse2/; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_1_add neon sse2/; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_256_add neon sse2/; - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_38_add neon sse2/; $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_10_add neon sse2/; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_1_add neon sse2/; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; # Need to add 135 eob idct32x32 implementations. $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1_add neon sse2/; add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; @@ -708,89 +697,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_idct32x32_34_add neon/; } # CONFIG_EMULATE_HARDWARE } else { - # Force C versions if CONFIG_EMULATE_HARDWARE is 1 - if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; - } else { - add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; + if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_64_add sse2 ssse3 neon dspr2 msa/; - add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct8x8_12_add sse2 ssse3 neon dspr2 msa/; - add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_38_add sse2 neon dspr2 msa/; $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2; $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa; - add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1024_add sse2 ssse3 neon dspr2 msa/; - add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_135_add sse2 ssse3 neon dspr2 msa/; $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; - add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_34_add sse2 ssse3 neon dspr2 msa/; - add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/; - add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_1_add msa/; - add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride"; specialize qw/vpx_iwht4x4_16_add msa sse2/; - } # CONFIG_EMULATE_HARDWARE + } # !CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9 From 4412996d5989104f34f200b0bf87aed5bd41ed05 Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Tue, 2 May 2017 13:31:56 -0700 Subject: [PATCH 2/3] Clean add_protos of highbd idct functions Change-Id: Ica51d780b92b316ce9112740c56cdf7670816371 --- vpx_dsp/vpx_dsp_rtcd_defs.pl | 48 +++++++++++++----------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index d200a01bb..7df8a1eac 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -599,46 +599,41 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. specialize qw/vpx_iwht4x4_16_add sse2/; + add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct4x4_1_add neon/; + add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + + add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_1_add neon/; + add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + + add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + + add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_1_add neon/; add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_1_add neon sse2/; - add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - # Force C versions if CONFIG_EMULATE_HARDWARE is 1 - if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") { - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; + add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - - add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; - } else { + if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct4x4_16_add neon sse2/; specialize qw/vpx_idct4x4_1_add neon sse2/; @@ -668,34 +663,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_idct32x32_1_add neon sse2/; - add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct4x4_16_add neon sse2/; - add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_64_add neon sse2/; - add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct8x8_12_add neon sse2/; - add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_256_add neon sse2/; - add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_38_add neon sse2/; $vpx_highbd_idct16x16_38_add_sse2=vpx_highbd_idct16x16_256_add_sse2; - add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct16x16_10_add neon sse2/; - add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_1024_add neon/; - add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_135_add neon/; - add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; specialize qw/vpx_highbd_idct32x32_34_add neon/; - } # CONFIG_EMULATE_HARDWARE + } # !CONFIG_EMULATE_HARDWARE } else { if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/; From 0178d974e529c0e7592ccd1be926d91b8935eb0d Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Tue, 2 May 2017 14:14:06 -0700 Subject: [PATCH 3/3] Clean specializes of idct functions Change-Id: I8bb660de47b5f97263ec381dc428db96e9c9a4b2 --- vpx_dsp/vpx_dsp_rtcd_defs.pl | 93 ++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 7df8a1eac..d5fb3e0cb 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -594,6 +594,36 @@ add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride"; +if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { + specialize qw/vpx_idct4x4_16_add neon sse2/; + + specialize qw/vpx_idct4x4_1_add neon sse2/; + + specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/; + + specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; + + specialize qw/vpx_idct8x8_1_add neon sse2/; + + specialize qw/vpx_idct16x16_256_add neon sse2/; + + specialize qw/vpx_idct16x16_38_add neon sse2/; + $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; + + specialize qw/vpx_idct16x16_10_add neon sse2/; + + specialize qw/vpx_idct16x16_1_add neon sse2/; + + specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/; + + specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; + $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; + + specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; + + specialize qw/vpx_idct32x32_1_add neon sse2/; +} # !CONFIG_EMULATE_HARDWARE + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. @@ -634,35 +664,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride, int bd"; if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { - specialize qw/vpx_idct4x4_16_add neon sse2/; - - specialize qw/vpx_idct4x4_1_add neon sse2/; - - specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/; - - specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; - - specialize qw/vpx_idct8x8_1_add neon sse2/; - - specialize qw/vpx_idct16x16_256_add neon sse2/; - - specialize qw/vpx_idct16x16_38_add neon sse2/; - $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; - - specialize qw/vpx_idct16x16_10_add neon sse2/; - - specialize qw/vpx_idct16x16_1_add neon sse2/; - - specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/; - - specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; - # Need to add 135 eob idct32x32 implementations. - $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; - - specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; - - specialize qw/vpx_idct32x32_1_add neon sse2/; - specialize qw/vpx_highbd_idct4x4_16_add neon sse2/; specialize qw/vpx_highbd_idct8x8_64_add neon sse2/; @@ -684,41 +685,39 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { } # !CONFIG_EMULATE_HARDWARE } else { if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { - specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct4x4_16_add dspr2 msa/; - specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct4x4_1_add dspr2 msa/; - specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct8x8_64_add dspr2 msa/; - specialize qw/vpx_idct8x8_64_add sse2 ssse3 neon dspr2 msa/; + specialize qw/vpx_idct8x8_12_add dspr2 msa/; - specialize qw/vpx_idct8x8_12_add sse2 ssse3 neon dspr2 msa/; + specialize qw/vpx_idct8x8_1_add dspr2 msa/; - specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct16x16_256_add dspr2 msa/; - specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/; - - specialize qw/vpx_idct16x16_38_add sse2 neon dspr2 msa/; - $vpx_idct16x16_38_add_sse2=vpx_idct16x16_256_add_sse2; + specialize qw/vpx_idct16x16_38_add dspr2 msa/; $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2; $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa; - specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/; + specialize qw/vpx_idct16x16_10_add dspr2 msa/; - specialize qw/vpx_idct32x32_1024_add sse2 ssse3 neon dspr2 msa/; + specialize qw/vpx_idct16x16_1_add dspr2 msa/; - specialize qw/vpx_idct32x32_135_add sse2 ssse3 neon dspr2 msa/; - $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2; + specialize qw/vpx_idct32x32_1024_add dspr2 msa/; + + specialize qw/vpx_idct32x32_135_add dspr2 msa/; $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2; $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; - specialize qw/vpx_idct32x32_34_add sse2 ssse3 neon dspr2 msa/; + specialize qw/vpx_idct32x32_34_add dspr2 msa/; - specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/; - - specialize qw/vpx_iwht4x4_1_add msa/; + specialize qw/vpx_idct32x32_1_add dspr2 msa/; specialize qw/vpx_iwht4x4_16_add msa sse2/; + + specialize qw/vpx_iwht4x4_1_add msa/; } # !CONFIG_EMULATE_HARDWARE } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9