From 45a7b5ebd7fcf7b329710e3f347ce40bd2bf6a84 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 12 Feb 2016 19:32:05 -0800 Subject: [PATCH 01/16] lpf_8_test: simplify function wrapper generation Change-Id: Ie4d3e80a4e43dd4ada78d073e308e10db4ea3239 --- test/lpf_8_test.cc | 159 ++++++++++++--------------------------------- 1 file changed, 43 insertions(+), 116 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 0bf6b0c23..c582bc34d 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -42,6 +42,17 @@ typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); + +// wrapper for loopfilter functions without a 'count' param. +typedef void (*loop_op_nc_t)(uint16_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int bd); +template +void wrapper_nc(uint16_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int /*count*/, int bd) { + fn(s, p, blimit, limit, thresh, bd); +} #else typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, @@ -50,107 +61,21 @@ typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); + +// wrapper for loopfilter functions without a 'count' param. +typedef void (*loop_op_nc_t)(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh); +template +void wrapper_nc(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int /*count*/) { + fn(s, p, blimit, limit, thresh); +} #endif // CONFIG_VP9_HIGHBITDEPTH typedef std::tr1::tuple loop8_param_t; typedef std::tr1::tuple dualloop8_param_t; -#if HAVE_SSE2 -#if CONFIG_VP9_HIGHBITDEPTH -void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd); -} - -void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd); -} - -void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd); -} - -void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { - vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd); -} -#else -void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); -} -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // HAVE_SSE2 - -#if HAVE_NEON_ASM -#if CONFIG_VP9_HIGHBITDEPTH -// No neon high bitdepth functions. -#else -void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh); -} -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // HAVE_NEON_ASM - -#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) -void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh); -} - -void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { - vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh); -} -#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) - class Loop8Test6Param : public ::testing::TestWithParam { public: virtual ~Loop8Test6Param() {} @@ -546,8 +471,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, &vpx_highbd_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_sse2, - &wrapper_vertical_16_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 10, 1), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, @@ -560,8 +485,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_16_c, 10, 2), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, &vpx_highbd_lpf_vertical_8_c, 10, 1), - make_tuple(&wrapper_vertical_16_sse2, - &wrapper_vertical_16_c, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 12, 1), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, @@ -574,14 +499,14 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_16_c, 12, 2), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, &vpx_highbd_lpf_vertical_8_c, 12, 1), - make_tuple(&wrapper_vertical_16_sse2, - &wrapper_vertical_16_c, 12, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 8, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 10, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 12, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1))); #else INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, @@ -590,9 +515,10 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1), - make_tuple(&wrapper_vertical_16_dual_sse2, - &wrapper_vertical_16_dual_c, 8, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif @@ -663,10 +589,10 @@ INSTANTIATE_TEST_CASE_P( &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&wrapper_vertical_16_neon, - &wrapper_vertical_16_c, 8, 1), - make_tuple(&wrapper_vertical_16_dual_neon, - &wrapper_vertical_16_dual_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), #endif // HAVE_NEON_ASM make_tuple(&vpx_lpf_horizontal_8_neon, &vpx_lpf_horizontal_8_c, 8, 1), @@ -700,7 +626,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1), - make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); INSTANTIATE_TEST_CASE_P( MSA, Loop8Test9Param, From c3f2c8ad2a00453b63cf3fab89968ad10d9d616b Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 12 Feb 2016 20:23:41 -0800 Subject: [PATCH 02/16] lpf_8_test: add missing vpx_lpf_vertical_4 tests mmx, msa Change-Id: I113ce0ec144ee673d5dcde4c03fe7670f9f4c369 --- test/lpf_8_test.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index c582bc34d..45cd6618c 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -454,6 +454,13 @@ TEST_P(Loop8Test9Param, ValueCheck) { using std::tr1::make_tuple; +#if HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + MMX, Loop8Test6Param, + ::testing::Values( + make_tuple(&vpx_lpf_vertical_4_mmx, &vpx_lpf_vertical_4_c, 8, 1))); +#endif // HAVE_MMX + #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( @@ -625,6 +632,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8, 1), make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1))); From 4fec4a8e288cebeb265996e54d2cb4cd000bb38b Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 12 Feb 2016 20:25:15 -0800 Subject: [PATCH 03/16] lpf_8_test: add missing vpx_lpf_horizontal_4 tests mmx, msa Change-Id: Ia9604adcdcc77411f383e081e01a18d232c9d992 --- test/lpf_8_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 45cd6618c..58bcb05d1 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -458,6 +458,7 @@ using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( MMX, Loop8Test6Param, ::testing::Values( + make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1), make_tuple(&vpx_lpf_vertical_4_mmx, &vpx_lpf_vertical_4_c, 8, 1))); #endif // HAVE_MMX @@ -629,6 +630,7 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( + make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1), make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), From 47dee375db30e803a9ff8c0694040b1eb819add7 Mon Sep 17 00:00:00 2001 From: James Zern Date: Sat, 13 Feb 2016 10:24:26 -0800 Subject: [PATCH 04/16] lpf_8_test: add missing dspr2 tests Change-Id: I3954ff86ec1965cd6d4eec570c2d1993538d9c11 --- test/lpf_8_test.cc | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 58bcb05d1..3f73a996f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -626,6 +626,36 @@ INSTANTIATE_TEST_CASE_P( #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON +#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + DSPR2, Loop8Test6Param, + ::testing::Values( + make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_dspr2, + &vpx_lpf_horizontal_16_c, 8, 1), + make_tuple(&vpx_lpf_horizontal_16_dspr2, + &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8, 1), + make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); + +INSTANTIATE_TEST_CASE_P( + DSPR2, Loop8Test9Param, + ::testing::Values( + make_tuple(&vpx_lpf_horizontal_4_dual_dspr2, + &vpx_lpf_horizontal_4_dual_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dual_dspr2, + &vpx_lpf_horizontal_8_dual_c, 8), + make_tuple(&vpx_lpf_vertical_4_dual_dspr2, + &vpx_lpf_vertical_4_dual_c, 8), + make_tuple(&vpx_lpf_vertical_8_dual_dspr2, + &vpx_lpf_vertical_8_dual_c, 8))); +#endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH + #if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, From 37225744dbf30d79711fa9ef182d2007a51b11bd Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 19:43:36 -0800 Subject: [PATCH 05/16] vpx_lpf_vertical_8: remove unused count param Change-Id: Ic69406da00afb0f06588e8c0deb2b043952b078c --- test/lpf_8_test.cc | 13 ++++++++----- vp10/common/loopfilter.c | 7 +++---- vp9/common/vp9_loopfilter.c | 7 +++---- vpx_dsp/arm/loopfilter_8_neon.asm | 14 +------------- vpx_dsp/arm/loopfilter_8_neon.c | 8 ++------ vpx_dsp/arm/loopfilter_neon.c | 4 ++-- vpx_dsp/loopfilter.c | 10 ++++------ vpx_dsp/mips/loopfilter_8_msa.c | 5 +---- vpx_dsp/mips/loopfilter_filters_dspr2.c | 5 ++--- vpx_dsp/mips/loopfilter_mb_dspr2.c | 3 +-- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/loopfilter_sse2.c | 3 +-- 12 files changed, 29 insertions(+), 52 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 3f73a996f..9697c88b9 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -522,7 +522,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -604,8 +605,8 @@ INSTANTIATE_TEST_CASE_P( #endif // HAVE_NEON_ASM make_tuple(&vpx_lpf_horizontal_8_neon, &vpx_lpf_horizontal_8_c, 8, 1), - make_tuple(&vpx_lpf_vertical_8_neon, - &vpx_lpf_vertical_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8, 1), make_tuple(&vpx_lpf_vertical_4_neon, @@ -637,7 +638,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8, 1), - make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -665,7 +667,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8, 1), - make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1))); diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index a1925de55..4171c1e08 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -345,11 +345,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_8x8_0 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } @@ -1127,7 +1126,7 @@ static void filter_selectively_vert(uint8_t *s, int pitch, if (mask_16x16 & 1) { vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 79c3c4820..8c281c2ec 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -345,11 +345,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_8x8_0 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } @@ -1102,7 +1101,7 @@ static void filter_selectively_vert(uint8_t *s, int pitch, if (mask_16x16 & 1) { vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } diff --git a/vpx_dsp/arm/loopfilter_8_neon.asm b/vpx_dsp/arm/loopfilter_8_neon.asm index e81734c04..61cabe8e8 100644 --- a/vpx_dsp/arm/loopfilter_8_neon.asm +++ b/vpx_dsp/arm/loopfilter_8_neon.asm @@ -82,30 +82,24 @@ end_vpx_mblf_h_edge ; int pitch, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int pitch, ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_vertical_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #16] ; load count vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #12] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns - cmp r12, #0 - beq end_vpx_mblf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh -count_mblf_v_loop vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 @@ -156,12 +150,6 @@ count_mblf_v_loop vst2.8 {d4[6], d5[6]}, [r3], r1 vst2.8 {d4[7], d5[7]}, [r3] - add r0, r0, r1, lsl #3 ; s += pitch * 8 - subs r12, r12, #1 - subne r2, r0, #4 ; move s pointer down by 4 columns - bne count_mblf_v_loop - -end_vpx_mblf_v_edge pop {r4-r5, pc} ENDP ; |vpx_lpf_vertical_8_neon| diff --git a/vpx_dsp/arm/loopfilter_8_neon.c b/vpx_dsp/arm/loopfilter_8_neon.c index a887e2ee5..3c005700f 100644 --- a/vpx_dsp/arm/loopfilter_8_neon.c +++ b/vpx_dsp/arm/loopfilter_8_neon.c @@ -328,8 +328,7 @@ void vpx_lpf_vertical_8_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; @@ -341,14 +340,11 @@ void vpx_lpf_vertical_8_neon( uint8x8x4_t d4Result; uint8x8x2_t d2Result; - if (count == 0) - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = src + (i * (pitch << 3)) - 4; d3u8 = vld1_u8(s); diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c index eff87d29b..581410541 100644 --- a/vpx_dsp/arm/loopfilter_neon.c +++ b/vpx_dsp/arm/loopfilter_neon.c @@ -44,8 +44,8 @@ void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 66f4d9576..fdb5dbbab 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -218,11 +218,10 @@ void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, } void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, @@ -238,9 +237,8 @@ void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1); + vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter16(int8_t mask, uint8_t thresh, diff --git a/vpx_dsp/mips/loopfilter_8_msa.c b/vpx_dsp/mips/loopfilter_8_msa.c index 00b6db550..ec3f5dd22 100644 --- a/vpx_dsp/mips/loopfilter_8_msa.c +++ b/vpx_dsp/mips/loopfilter_8_msa.c @@ -161,8 +161,7 @@ void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch, void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p1_out, p0_out, q0_out, q1_out; v16u8 flat, mask, hev, thresh, b_limit, limit; @@ -171,8 +170,6 @@ void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, v16u8 zero = { 0 }; v8i16 vec0, vec1, vec2, vec3, vec4; - (void)count; - /* load vector elements */ LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3); diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index 99a96d89b..529df4ee6 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -346,9 +346,8 @@ void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, - 1); + vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, diff --git a/vpx_dsp/mips/loopfilter_mb_dspr2.c b/vpx_dsp/mips/loopfilter_mb_dspr2.c index 4138f5697..5bbf091c8 100644 --- a/vpx_dsp/mips/loopfilter_mb_dspr2.c +++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c @@ -322,8 +322,7 @@ void vpx_lpf_vertical_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask, hev, flat; uint8_t *s1, *s2, *s3, *s4; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 73726d217..feaf0ae7e 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -535,7 +535,7 @@ add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8 specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon; -add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c index ed1012736..086d075fa 100644 --- a/vpx_dsp/x86/loopfilter_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -1492,11 +1492,10 @@ void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, - const unsigned char *thresh, int count) { + const unsigned char *thresh) { DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); unsigned char *src[1]; unsigned char *dst[1]; - (void)count; // Transpose 8x8 src[0] = s - 4; From 109a47b3426d302df201295aeff9cf0e40badf69 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 19:54:51 -0800 Subject: [PATCH 06/16] vpx_lpf_vertical_4: remove unused count param Change-Id: I43a191cb3d42e51e7bca266adfa11c6239a8064c --- test/lpf_8_test.cc | 13 ++++++++----- vp10/common/loopfilter.c | 13 ++++++------- vp9/common/vp9_loopfilter.c | 13 ++++++------- vpx_dsp/arm/loopfilter_4_neon.asm | 16 +--------------- vpx_dsp/arm/loopfilter_4_neon.c | 8 ++------ vpx_dsp/arm/loopfilter_neon.c | 4 ++-- vpx_dsp/loopfilter.c | 10 ++++------ vpx_dsp/mips/loopfilter_4_msa.c | 5 +---- vpx_dsp/mips/loopfilter_filters_dspr2.c | 7 +++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/loopfilter_mmx.asm | 11 ++--------- 11 files changed, 36 insertions(+), 66 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 9697c88b9..5c83f3a1f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -459,7 +459,8 @@ INSTANTIATE_TEST_CASE_P( MMX, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_lpf_vertical_4_mmx, &vpx_lpf_vertical_4_c, 8, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); #endif // HAVE_MMX #if HAVE_SSE2 @@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_lpf_vertical_4_neon, - &vpx_lpf_vertical_4_c, 8, 1))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values( @@ -637,7 +638,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 4171c1e08..9f55dc248 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } @@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } } @@ -1128,11 +1127,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch, } else if (mask_8x8 & 1) { vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 8c281c2ec..e892f78d0 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi1->hev_thr); } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } @@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + lfi0->hev_thr); } else { vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + lfi1->hev_thr); } } } @@ -1103,11 +1102,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch, } else if (mask_8x8 & 1) { vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm index e45e34cd4..d794f552a 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.asm +++ b/vpx_dsp/arm/loopfilter_4_neon.asm @@ -79,37 +79,29 @@ end_vpx_lf_h_edge ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_vertical_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_vertical_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #8] ; load count vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #4] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns - cmp r12, #0 - beq end_vpx_lf_v_edge vld1.8 {d2[]}, [r3] ; duplicate *thresh -count_lf_v_loop vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 @@ -149,12 +141,6 @@ count_lf_v_loop vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1 vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0] - add r0, r0, r1, lsl #3 ; s += pitch * 8 - subs r12, r12, #1 - subne r2, r0, #4 ; move s pointer down by 4 columns - bne count_lf_v_loop - -end_vpx_lf_v_edge pop {pc} ENDP ; |vpx_lpf_vertical_4_neon| diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c index 7ad411aea..db9ea6a9d 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.c +++ b/vpx_dsp/arm/loopfilter_4_neon.c @@ -170,8 +170,7 @@ void vpx_lpf_vertical_4_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i, pitch8; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; @@ -181,15 +180,12 @@ void vpx_lpf_vertical_4_neon( uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x4_t d4Result; - if (count == 0) // end_vpx_lf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); pitch8 = pitch * 8; - for (i = 0; i < count; i++, src += pitch8) { + for (i = 0; i < 1; i++, src += pitch8) { s = src - (i + 1) * 4; d3u8 = vld1_u8(s); diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c index 581410541..b01944ebb 100644 --- a/vpx_dsp/arm/loopfilter_neon.c +++ b/vpx_dsp/arm/loopfilter_neon.c @@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1); } #if HAVE_NEON_ASM diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index fdb5dbbab..1604fdbcf 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -143,13 +143,12 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, @@ -163,9 +162,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1); + vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c index daf5f38bf..ebeaddd21 100644 --- a/vpx_dsp/mips/loopfilter_4_msa.c +++ b/vpx_dsp/mips/loopfilter_4_msa.c @@ -74,14 +74,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { v16u8 mask, hev, flat, limit, thresh, b_limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v8i16 vec0, vec1, vec2, vec3; - (void)count; - LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index 529df4ee6..9924982f1 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -117,8 +117,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; @@ -335,8 +334,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); + vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index feaf0ae7e..eeb03b671 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -542,7 +542,7 @@ add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_ specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon; -add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/; add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm index b9c18b680..dee565ce0 100644 --- a/vpx_dsp/x86/loopfilter_mmx.asm +++ b/vpx_dsp/x86/loopfilter_mmx.asm @@ -230,14 +230,13 @@ sym(vpx_lpf_horizontal_4_mmx): ; int src_pixel_step, ; const char *blimit, ; const char *limit, -; const char *thresh, -; int count +; const char *thresh ;) global sym(vpx_lpf_vertical_4_mmx) PRIVATE sym(vpx_lpf_vertical_4_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi @@ -254,8 +253,6 @@ sym(vpx_lpf_vertical_4_mmx): lea rsi, [rsi + rax*4 - 4] - movsxd rcx, dword ptr arg(5) ;count -.next8_v: mov rdi, rsi ; rdi points to row +1 for indirect addressing add rdi, rax @@ -579,10 +576,6 @@ sym(vpx_lpf_vertical_4_mmx): movd [rdi+rax*2+2], mm5 - lea rsi, [rsi+rax*8] - dec rcx - jnz .next8_v - add rsp, 64 pop rsp ; begin epilog From bd5a5bb561845a6d6a2a0295d7681be09a66ec48 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:02:53 -0800 Subject: [PATCH 07/16] vpx_lpf_horizontal_8: remove unused count param Change-Id: I48741e167a7b09b7c9ad3bfc1c4b88ef1029ae46 --- test/lpf_8_test.cc | 13 ++++++++----- vp10/common/loopfilter.c | 2 +- vp9/common/vp9_loopfilter.c | 2 +- vpx_dsp/arm/loopfilter_8_neon.asm | 16 +--------------- vpx_dsp/arm/loopfilter_8_neon.c | 8 ++------ vpx_dsp/arm/loopfilter_neon.c | 4 ++-- vpx_dsp/loopfilter.c | 9 ++++----- vpx_dsp/mips/loopfilter_8_msa.c | 5 +---- vpx_dsp/mips/loopfilter_filters_dspr2.c | 4 ++-- vpx_dsp/mips/loopfilter_mb_dspr2.c | 3 +-- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/loopfilter_sse2.c | 6 ++---- 12 files changed, 26 insertions(+), 48 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 5c83f3a1f..394360e2f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -520,7 +520,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&wrapper_nc, @@ -604,8 +605,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), #endif // HAVE_NEON_ASM - make_tuple(&vpx_lpf_horizontal_8_neon, - &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_4_neon, @@ -633,7 +634,8 @@ INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_dspr2, @@ -665,7 +667,8 @@ INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 9f55dc248..6bbf191ac 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -542,7 +542,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } count = 2; } else { - vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index e892f78d0..d5431c2c2 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -542,7 +542,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } count = 2; } else { - vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, diff --git a/vpx_dsp/arm/loopfilter_8_neon.asm b/vpx_dsp/arm/loopfilter_8_neon.asm index 61cabe8e8..a2f20e15f 100644 --- a/vpx_dsp/arm/loopfilter_8_neon.asm +++ b/vpx_dsp/arm/loopfilter_8_neon.asm @@ -16,35 +16,26 @@ ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_horizontal_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #16] ; load count ldr r2, [sp, #12] ; load thresh add r1, r1, r1 ; double pitch - cmp r12, #0 - beq end_vpx_mblf_h_edge - vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh -count_mblf_h_loop sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines add r2, r3, r1, lsr #1 ; set to 3 lines down @@ -69,11 +60,6 @@ count_mblf_h_loop vst1.u8 {d4}, [r2@64], r1 ; store oq1 vst1.u8 {d5}, [r3@64], r1 ; store oq2 - add r0, r0, #8 - subs r12, r12, #1 - bne count_mblf_h_loop - -end_vpx_mblf_h_edge pop {r4-r5, pc} ENDP ; |vpx_lpf_horizontal_8_neon| diff --git a/vpx_dsp/arm/loopfilter_8_neon.c b/vpx_dsp/arm/loopfilter_8_neon.c index 3c005700f..ec3757380 100644 --- a/vpx_dsp/arm/loopfilter_8_neon.c +++ b/vpx_dsp/arm/loopfilter_8_neon.c @@ -268,23 +268,19 @@ void vpx_lpf_horizontal_8_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s, *psrc; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; uint8x8_t d16u8, d17u8, d18u8; - if (count == 0) // end_vpx_mblf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); psrc = src - (pitch << 2); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = psrc + i * 8; d3u8 = vld1_u8(s); diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c index b01944ebb..aa31f2935 100644 --- a/vpx_dsp/arm/loopfilter_neon.c +++ b/vpx_dsp/arm/loopfilter_neon.c @@ -33,8 +33,8 @@ void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 1604fdbcf..e8092d912 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -188,13 +188,12 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, } void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { + const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; @@ -211,8 +210,8 @@ void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, diff --git a/vpx_dsp/mips/loopfilter_8_msa.c b/vpx_dsp/mips/loopfilter_8_msa.c index ec3f5dd22..5b22bd002 100644 --- a/vpx_dsp/mips/loopfilter_8_msa.c +++ b/vpx_dsp/mips/loopfilter_8_msa.c @@ -13,8 +13,7 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; @@ -23,8 +22,6 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r; v16i8 zero = { 0 }; - (void)count; - /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index 9924982f1..8a24372cb 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -323,8 +323,8 @@ void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, diff --git a/vpx_dsp/mips/loopfilter_mb_dspr2.c b/vpx_dsp/mips/loopfilter_mb_dspr2.c index 5bbf091c8..dd0545eed 100644 --- a/vpx_dsp/mips/loopfilter_mb_dspr2.c +++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c @@ -23,8 +23,7 @@ void vpx_lpf_horizontal_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint32_t mask; uint32_t hev, flat; uint8_t i; diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index eeb03b671..3f63a5f62 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -552,7 +552,7 @@ add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/; $vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon; -add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c index 086d075fa..e1236dc4d 100644 --- a/vpx_dsp/x86/loopfilter_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -730,7 +730,7 @@ void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh, int count) { + const unsigned char *_thresh) { DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); @@ -745,8 +745,6 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, __m128i p3, p2, p1, p0, q0, q1, q2, q3; __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; - (void)count; - q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)), _mm_loadl_epi64((__m128i *)(s + 3 * p))); q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), @@ -1504,7 +1502,7 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, transpose(src, p, dst, 8, 1); // Loop filtering - vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1); + vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); src[0] = t_dst; dst[0] = s - 4; From b1e97c6a25d53fb2e62e2fb857fbf146bb19cbd3 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:26:54 -0800 Subject: [PATCH 08/16] vpx_lpf_horizontal_4: remove unused count param Change-Id: Iec7d8eda343991f7d7d46931dca17af23c821d11 --- test/lpf_8_test.cc | 13 ++++++++----- vp10/common/loopfilter.c | 16 ++++++++-------- vp9/common/vp9_loopfilter.c | 16 ++++++++-------- vpx_dsp/arm/loopfilter_4_neon.asm | 16 +--------------- vpx_dsp/arm/loopfilter_4_neon.c | 8 ++------ vpx_dsp/loopfilter.c | 8 ++++---- vpx_dsp/mips/loopfilter_4_msa.c | 5 +---- vpx_dsp/mips/loopfilter_filters_dspr2.c | 7 +++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/loopfilter_mmx.asm | 12 ++---------- 10 files changed, 38 insertions(+), 65 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 394360e2f..e6fe1e508 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -458,7 +458,8 @@ using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( MMX, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1))); #endif // HAVE_MMX @@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_lpf_horizontal_4_neon, - &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1))); INSTANTIATE_TEST_CASE_P( @@ -633,7 +634,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_dspr2, @@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 6bbf191ac..1f7ce981f 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { @@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { @@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4_int & 1) { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } s += 8 * count; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index d5431c2c2..7cc833e19 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { @@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { @@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4_int & 1) { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } s += 8 * count; diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm index d794f552a..937115898 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.asm +++ b/vpx_dsp/arm/loopfilter_4_neon.asm @@ -16,37 +16,28 @@ ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. -; TODO(fgalligan): See about removing the count code as this function is only -; called with a count of 1. ; ; void vpx_lpf_horizontal_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -; sp+4 int count |vpx_lpf_horizontal_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit - ldr r12, [sp, #8] ; load count ldr r2, [sp, #4] ; load thresh add r1, r1, r1 ; double pitch - cmp r12, #0 - beq end_vpx_lf_h_edge - vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh -count_lf_h_loop sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines add r3, r2, r1, lsr #1 ; set to 3 lines down @@ -69,11 +60,6 @@ count_lf_h_loop vst1.u8 {d6}, [r2@64], r1 ; store oq0 vst1.u8 {d7}, [r3@64], r1 ; store oq1 - add r0, r0, #8 - subs r12, r12, #1 - bne count_lf_h_loop - -end_vpx_lf_h_edge pop {pc} ENDP ; |vpx_lpf_horizontal_4_neon| diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c index db9ea6a9d..7f3ee70b9 100644 --- a/vpx_dsp/arm/loopfilter_4_neon.c +++ b/vpx_dsp/arm/loopfilter_4_neon.c @@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon( int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { int i; uint8_t *s, *psrc; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - if (count == 0) // end_vpx_lf_h_edge - return; - dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); psrc = src - (pitch << 2); - for (i = 0; i < count; i++) { + for (i = 0; i < 1; i++) { s = psrc + i * 8; d3u8 = vld1_u8(s); diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index e8092d912..e545d36ab 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -119,12 +119,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int count) { + const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; const int8_t mask = filter_mask(*limit, *blimit, @@ -138,8 +138,8 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c index ebeaddd21..936347031 100644 --- a/vpx_dsp/mips/loopfilter_4_msa.c +++ b/vpx_dsp/mips/loopfilter_4_msa.c @@ -13,14 +13,11 @@ void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { + const uint8_t *thresh_ptr) { uint64_t p1_d, p0_d, q0_d, q1_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out; - (void)count; - /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c index 8a24372cb..8414b9ed5 100644 --- a/vpx_dsp/mips/loopfilter_filters_dspr2.c +++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c @@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, - int count) { + const uint8_t *thresh) { uint8_t i; uint32_t mask; uint32_t hev; @@ -312,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1); - vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); + vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 3f63a5f62..36c89db8f 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -559,7 +559,7 @@ add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/; $vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon; -add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/; add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm index dee565ce0..15105e3ed 100644 --- a/vpx_dsp/x86/loopfilter_mmx.asm +++ b/vpx_dsp/x86/loopfilter_mmx.asm @@ -18,14 +18,13 @@ ; int src_pixel_step, ; const char *blimit, ; const char *limit, -; const char *thresh, -; int count +; const char *thresh ;) global sym(vpx_lpf_horizontal_4_mmx) PRIVATE sym(vpx_lpf_horizontal_4_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi @@ -39,8 +38,6 @@ sym(vpx_lpf_horizontal_4_mmx): mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - movsxd rcx, dword ptr arg(5) ;count -.next8_h: mov rdx, arg(3) ;limit movq mm7, [rdx] mov rdi, rsi ; rdi points to row +1 for indirect addressing @@ -208,11 +205,6 @@ sym(vpx_lpf_horizontal_4_mmx): pxor mm7, [GLOBAL(t80)] ; unoffset movq [rdi], mm7 ; write back - add rsi,8 - neg rax - dec rcx - jnz .next8_h - add rsp, 32 pop rsp ; begin epilog From 72a9f06ac2f276d55fc1111012a9b35f6e30b711 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:49:56 -0800 Subject: [PATCH 09/16] vpx_highbd_lpf_vertical_8: remove unused count param Change-Id: Id16f7259897654831d31642c2d5e0bbe5e13416c --- test/lpf_8_test.cc | 12 ++++++------ vp10/common/loopfilter.c | 6 +++--- vp9/common/vp9_loopfilter.c | 6 +++--- vpx_dsp/loopfilter.c | 8 ++++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/highbd_loopfilter_sse2.c | 3 +-- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index e6fe1e508..c3b2c7807 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -479,8 +479,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 8, 2), - make_tuple(&vpx_highbd_lpf_vertical_8_sse2, - &vpx_highbd_lpf_vertical_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, @@ -493,8 +493,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_16_c, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 10, 2), - make_tuple(&vpx_highbd_lpf_vertical_8_sse2, - &vpx_highbd_lpf_vertical_8_c, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, &wrapper_nc, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, @@ -507,8 +507,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_16_c, 12, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 12, 2), - make_tuple(&vpx_highbd_lpf_vertical_8_sse2, - &vpx_highbd_lpf_vertical_8_c, 12, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 1f7ce981f..57480bd4f 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -444,10 +444,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr, bd); } else if (mask_8x8_0 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + lfi0->hev_thr, bd); } else { vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + lfi1->lim, lfi1->hev_thr, bd); } } @@ -1161,7 +1161,7 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, lfi->hev_thr, bd); } else if (mask_8x8 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 7cc833e19..9bf453e37 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -444,10 +444,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr, bd); } else if (mask_8x8_0 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + lfi0->hev_thr, bd); } else { vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + lfi1->lim, lfi1->hev_thr, bd); } } @@ -1136,7 +1136,7 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, lfi->hev_thr, bd); } else if (mask_8x8 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1, bd); diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index e545d36ab..2a22f7ee7 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -565,10 +565,10 @@ void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { int i; - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, @@ -591,9 +591,9 @@ void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1, bd); + thresh1, bd); } static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 36c89db8f..52cd6a889 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -572,7 +572,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/; - add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_vertical_8 sse2/; add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index c4fd5e1a0..9fc1f5b9d 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -1112,11 +1112,10 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; - (void)count; // Transpose 8x8 src[0] = s - 4; From 3c1019e49df424ac0c012a409153cb2bc551a50d Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:50:42 -0800 Subject: [PATCH 10/16] vpx_highbd_lpf_vertical_4: remove unused count param Change-Id: Ic6da723c5cf3cd8127db1f476c3e46ea134cb774 --- test/lpf_8_test.cc | 12 ++++++------ vp10/common/loopfilter.c | 12 ++++++------ vp9/common/vp9_loopfilter.c | 12 ++++++------ vpx_dsp/loopfilter.c | 8 ++++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/highbd_loopfilter_sse2.c | 3 +-- 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index c3b2c7807..5d8eb1f1f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -471,8 +471,8 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 8, 1), - make_tuple(&vpx_highbd_lpf_vertical_4_sse2, - &vpx_highbd_lpf_vertical_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, &vpx_highbd_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, @@ -485,8 +485,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 10, 1), - make_tuple(&vpx_highbd_lpf_vertical_4_sse2, - &vpx_highbd_lpf_vertical_4_c, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, &vpx_highbd_lpf_horizontal_8_c, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, @@ -499,8 +499,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 12, 1), - make_tuple(&vpx_highbd_lpf_vertical_4_sse2, - &vpx_highbd_lpf_vertical_4_c, 12, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, &vpx_highbd_lpf_horizontal_8_c, 12, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 57480bd4f..453b3319e 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -458,10 +458,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr, bd); } else if (mask_4x4_0 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + lfi0->hev_thr, bd); } else { vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + lfi1->lim, lfi1->hev_thr, bd); } } @@ -472,10 +472,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr, bd); } else if (mask_4x4_int_0 & 1) { vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + lfi0->hev_thr, bd); } else { vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + lfi1->lim, lfi1->hev_thr, bd); } } } @@ -1164,12 +1164,12 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } if (mask_4x4_int & 1) vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 9bf453e37..9dbec0959 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -458,10 +458,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr, bd); } else if (mask_4x4_0 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + lfi0->hev_thr, bd); } else { vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + lfi1->lim, lfi1->hev_thr, bd); } } @@ -472,10 +472,10 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, lfi1->hev_thr, bd); } else if (mask_4x4_int_0 & 1) { vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + lfi0->hev_thr, bd); } else { vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + lfi1->lim, lfi1->hev_thr, bd); } } } @@ -1139,12 +1139,12 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } if (mask_4x4_int & 1) vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 2a22f7ee7..740d74757 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -480,12 +480,12 @@ void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, @@ -503,9 +503,9 @@ void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd); + vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, 1, bd); + thresh1, bd); } static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 52cd6a889..c2b042271 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -578,7 +578,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/; - add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_vertical_4 sse2/; add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index 9fc1f5b9d..a48bbd316 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -1058,11 +1058,10 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; - (void)count; // Transpose 8x8 src[0] = s - 4; From 51718573295eaf556c9b1d2dab8036837d8adfe7 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:54:16 -0800 Subject: [PATCH 11/16] vpx_highbd_lpf_horizontal_8: remove unused count param Change-Id: Iaca71ea3796115d4c2d43563b4e6f3914e21f1bf --- test/lpf_8_test.cc | 12 ++++++------ vp10/common/loopfilter.c | 2 +- vp9/common/vp9_loopfilter.c | 2 +- vpx_dsp/loopfilter.c | 8 ++++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/highbd_loopfilter_sse2.c | 12 ++++-------- 6 files changed, 17 insertions(+), 21 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 5d8eb1f1f..f52d8e8e0 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -473,8 +473,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_4_c, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, - &vpx_highbd_lpf_horizontal_8_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, @@ -487,8 +487,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_4_c, 10, 1), make_tuple(&wrapper_nc, &wrapper_nc, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, - &vpx_highbd_lpf_horizontal_8_c, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 10, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, @@ -501,8 +501,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_highbd_lpf_horizontal_4_c, 12, 1), make_tuple(&wrapper_nc, &wrapper_nc, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, - &vpx_highbd_lpf_horizontal_8_c, 12, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 12, 1), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 453b3319e..62d34ff26 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -642,7 +642,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, count = 2; } else { vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 9dbec0959..d4574e5b5 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -642,7 +642,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, count = 2; } else { vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 740d74757..8b740f557 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -531,12 +531,12 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { + int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; @@ -559,8 +559,8 @@ void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd); - vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); + vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index c2b042271..1e7800ad7 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -587,7 +587,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index a48bbd316..53786de83 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -523,7 +523,7 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, - int count, int bd) { + int bd) { DECLARE_ALIGNED(16, uint16_t, flat_op2[16]); DECLARE_ALIGNED(16, uint16_t, flat_op1[16]); DECLARE_ALIGNED(16, uint16_t, flat_op0[16]); @@ -556,8 +556,6 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, __m128i work_a; __m128i filter1, filter2; - (void)count; - if (bd == 8) { blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); @@ -764,9 +762,8 @@ void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p, const uint8_t *_limit1, const uint8_t *_thresh1, int bd) { - vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, - 1, bd); + vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd); + vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd); } void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, @@ -1123,8 +1120,7 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 1); // Loop filtering - vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, - bd); + vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; dst[0] = s - 4; From e7a23d703bc8f62fb387d71c0f70121253dede30 Mon Sep 17 00:00:00 2001 From: James Zern Date: Thu, 11 Feb 2016 20:59:39 -0800 Subject: [PATCH 12/16] vpx_highbd_lpf_horizontal_4: remove unused count param Change-Id: I655a771e1b1a8753be5669ef9348a312ba6cfdbc --- test/lpf_8_test.cc | 12 ++++++------ vp10/common/loopfilter.c | 16 ++++++++-------- vp9/common/vp9_loopfilter.c | 16 ++++++++-------- vpx_dsp/loopfilter.c | 8 ++++---- vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- vpx_dsp/x86/highbd_loopfilter_sse2.c | 12 ++++-------- 6 files changed, 31 insertions(+), 35 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index f52d8e8e0..0d898bc8f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -469,8 +469,8 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, - &vpx_highbd_lpf_horizontal_4_c, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -483,8 +483,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, - &vpx_highbd_lpf_horizontal_4_c, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, @@ -497,8 +497,8 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, &wrapper_nc, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, - &vpx_highbd_lpf_horizontal_4_c, 12, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 62d34ff26..a659aaea4 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -633,10 +633,10 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; @@ -646,7 +646,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4 & 1) { @@ -665,25 +665,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } s += 8 * count; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index d4574e5b5..18420eff8 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -633,10 +633,10 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; @@ -646,7 +646,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4 & 1) { @@ -665,25 +665,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } s += 8 * count; diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 8b740f557..0ca1d9d8c 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -445,12 +445,12 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int count, int bd) { + const uint8_t *thresh, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { + for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4 * p]; const uint16_t p2 = s[-3 * p]; const uint16_t p1 = s[-2 * p]; @@ -474,8 +474,8 @@ void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd); - vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); + vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 1e7800ad7..17f11eb89 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -593,7 +593,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; + add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_horizontal_4 sse2/; add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index 53786de83..73deb733f 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -770,7 +770,7 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, const uint8_t *_blimit, const uint8_t *_limit, const uint8_t *_thresh, - int count, int bd) { + int bd) { const __m128i zero = _mm_set1_epi16(0); __m128i blimit, limit, thresh; __m128i mask, hev, flat; @@ -810,8 +810,6 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, __m128i work_a; __m128i filter1, filter2; - (void)count; - if (bd == 8) { blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); @@ -941,9 +939,8 @@ void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p, const uint8_t *_limit1, const uint8_t *_thresh1, int bd) { - vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd); - vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1, - bd); + vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd); + vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd); } static INLINE void highbd_transpose(uint16_t *src[], int in_p, @@ -1067,8 +1064,7 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 1); // Loop filtering - vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1, - bd); + vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; dst[0] = s - 4; From 1b519fb666e79f25d93c78d3b90e92e057caa997 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 12 Feb 2016 17:42:34 -0800 Subject: [PATCH 13/16] split vpx_lpf_horizontal_16 in two replace with vpx_lpf_horizontal_edge_16 and vpx_lpf_horizontal_edge_8 to avoid passing a count parameter Change-Id: I848c95c02a3c6ebaa6c2bdf0983dce05cd645271 --- test/lpf_8_test.cc | 35 ++++++++++-------- vp10/common/loopfilter.c | 8 ++-- vp9/common/vp9_loopfilter.c | 8 ++-- vpx_dsp/arm/loopfilter_mb_neon.asm | 47 +++++++++++++++++++----- vpx_dsp/loopfilter.c | 16 ++++++-- vpx_dsp/mips/loopfilter_16_msa.c | 24 +++++++++--- vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c | 26 ++++++++++--- vpx_dsp/vpx_dsp_rtcd_defs.pl | 10 +++-- vpx_dsp/x86/loopfilter_avx2.c | 23 ++++-------- vpx_dsp/x86/loopfilter_sse2.c | 34 +++++------------ 10 files changed, 143 insertions(+), 88 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 0d898bc8f..9ddbf71cb 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -523,8 +523,10 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -538,9 +540,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( AVX2, Loop8Test6Param, ::testing::Values( - make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, - 2))); + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1))); #endif #if HAVE_SSE2 @@ -597,10 +600,10 @@ INSTANTIATE_TEST_CASE_P( #if HAVE_NEON_ASM // Using #if inside the macro is unsupported on MSVS but the tests are not // currently built for MSVS with ARM and NEON. - make_tuple(&vpx_lpf_horizontal_16_neon, - &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_neon, - &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -638,10 +641,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_dspr2, - &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_dspr2, - &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -672,8 +675,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index a659aaea4..3fe316db8 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -512,12 +512,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); + vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); count = 2; } else { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 18420eff8..aae0a33fa 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -512,12 +512,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); + vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); count = 2; } else { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { diff --git a/vpx_dsp/arm/loopfilter_mb_neon.asm b/vpx_dsp/arm/loopfilter_mb_neon.asm index 20d9cfb11..d5da7a840 100644 --- a/vpx_dsp/arm/loopfilter_mb_neon.asm +++ b/vpx_dsp/arm/loopfilter_mb_neon.asm @@ -8,27 +8,28 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vpx_lpf_horizontal_16_neon| + EXPORT |vpx_lpf_horizontal_edge_8_neon| + EXPORT |vpx_lpf_horizontal_edge_16_neon| EXPORT |vpx_lpf_vertical_16_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh -; int count) +; void mb_lpf_horizontal_edge(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -|vpx_lpf_horizontal_16_neon| PROC +; r12 int count +|mb_lpf_horizontal_edge| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh - ldr r12, [sp, #92] ; load count h_count vld1.8 {d16[]}, [r2] ; load *blimit @@ -115,7 +116,35 @@ h_next vpop {d8-d15} pop {r4-r8, pc} - ENDP ; |vpx_lpf_horizontal_16_neon| + ENDP ; |mb_lpf_horizontal_edge| + +; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int pitch, +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh +|vpx_lpf_horizontal_edge_8_neon| PROC + mov r12, #1 + b mb_lpf_horizontal_edge + ENDP ; |vpx_lpf_horizontal_edge_8_neon| + +; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) +; r0 uint8_t *s, +; r1 int pitch, +; r2 const uint8_t *blimit, +; r3 const uint8_t *limit, +; sp const uint8_t *thresh +|vpx_lpf_horizontal_edge_16_neon| PROC + mov r12, #2 + b mb_lpf_horizontal_edge + ENDP ; |vpx_lpf_horizontal_edge_16_neon| ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, ; const uint8_t *blimit, diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index 0ca1d9d8c..f866a3dcf 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -289,9 +289,9 @@ static INLINE void filter16(int8_t mask, uint8_t thresh, } } -void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count) { +static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int count) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -315,6 +315,16 @@ void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, } } +void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); +} + +void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { + mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); +} + static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, diff --git a/vpx_dsp/mips/loopfilter_16_msa.c b/vpx_dsp/mips/loopfilter_16_msa.c index b7c9f7bd0..a6c581d72 100644 --- a/vpx_dsp/mips/loopfilter_16_msa.c +++ b/vpx_dsp/mips/loopfilter_16_msa.c @@ -423,11 +423,11 @@ void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, } } -void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, - const uint8_t *b_limit_ptr, - const uint8_t *limit_ptr, - const uint8_t *thresh_ptr, - int32_t count) { +static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch, + const uint8_t *b_limit_ptr, + const uint8_t *limit_ptr, + const uint8_t *thresh_ptr, + int32_t count) { if (1 == count) { uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; uint64_t dword0, dword1; @@ -648,6 +648,20 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, } } +void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch, + const uint8_t *b_limit_ptr, + const uint8_t *limit_ptr, + const uint8_t *thresh_ptr) { + mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1); +} + +void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch, + const uint8_t *b_limit_ptr, + const uint8_t *limit_ptr, + const uint8_t *thresh_ptr) { + mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2); +} + static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch, uint8_t *output, int32_t out_pitch) { v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org; diff --git a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c index 8a4865073..85e167ca0 100644 --- a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c +++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c @@ -19,12 +19,12 @@ #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 -void vpx_lpf_horizontal_16_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +static void mb_lpf_horizontal_edge(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint32_t mask; uint32_t hev, flat, flat2; uint8_t i; @@ -791,4 +791,18 @@ void vpx_lpf_horizontal_16_dspr2(unsigned char *s, s = s + 4; } } + +void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1); +} + +void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2); +} #endif // #if HAVE_DSPR2 diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 17f11eb89..557b4c55f 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -548,9 +548,13 @@ specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/; add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/; -add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; -specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/; -$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon; +add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/; +$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon; + +add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/; +$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon; add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/; diff --git a/vpx_dsp/x86/loopfilter_avx2.c b/vpx_dsp/x86/loopfilter_avx2.c index 23a97dd05..be1087c1e 100644 --- a/vpx_dsp/x86/loopfilter_avx2.c +++ b/vpx_dsp/x86/loopfilter_avx2.c @@ -13,9 +13,10 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p, - const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); @@ -400,9 +401,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 }; -static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, - const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); @@ -975,12 +977,3 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, _mm_storeu_si128((__m128i *) (s + 6 * p), q6); } } - -void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, - const unsigned char *_blimit, const unsigned char *_limit, - const unsigned char *_thresh, int count) { - if (count == 1) - mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh); - else - mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh); -} diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c index e1236dc4d..e03508a03 100644 --- a/vpx_dsp/x86/loopfilter_sse2.c +++ b/vpx_dsp/x86/loopfilter_sse2.c @@ -18,11 +18,10 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) { return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); } -static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s, - int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); @@ -383,11 +382,10 @@ static INLINE __m128i filter16_mask(const __m128i *const flat, return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); } -static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, - int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); @@ -716,17 +714,6 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, } } -// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh, int count) { - if (count == 1) - mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh); - else - mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh); -} - void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, @@ -1554,7 +1541,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, transpose(src, p, dst, 8, 2); // Loop filtering - mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh); + vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); src[0] = t_dst; src[1] = t_dst + 8 * 8; @@ -1575,8 +1562,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); // Loop filtering - mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, - thresh); + vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); // Transpose back transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); From 9b44d9d00fcf015f9a8ab5cde7ee5d62c00a0495 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 12 Feb 2016 18:12:57 -0800 Subject: [PATCH 14/16] split vpx_highbd_lpf_horizontal_16 in two replace with vpx_highbd_lpf_horizontal_edge_16 and vpx_highbd_lpf_horizontal_edge_8 to avoid passing a count parameter Change-Id: I551f8cec0fce57032cb2652584bb802e2248644d --- test/lpf_8_test.cc | 24 +++++++-------- vp10/common/loopfilter.c | 8 ++--- vp9/common/vp9_loopfilter.c | 8 ++--- vpx_dsp/loopfilter.c | 22 ++++++++++++-- vpx_dsp/vpx_dsp_rtcd_defs.pl | 7 +++-- vpx_dsp/x86/highbd_loopfilter_sse2.c | 45 +++++++++------------------- 6 files changed, 58 insertions(+), 56 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 9ddbf71cb..13a4c476f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -475,10 +475,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 8, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, &wrapper_nc, 8, 1), make_tuple(&wrapper_nc, @@ -489,10 +489,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, &wrapper_nc, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 10, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, &wrapper_nc, 10, 1), make_tuple(&wrapper_nc, @@ -503,10 +503,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, &wrapper_nc, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 12, 2), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), + make_tuple(&wrapper_nc, + &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, &wrapper_nc, 12, 1), make_tuple(&wrapper_nc, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 3fe316db8..11dfe6d70 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2, bd); + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); count = 2; } else { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index aae0a33fa..ee20cc557 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2, bd); + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); count = 2; } else { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index f866a3dcf..46ef64617 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -669,9 +669,11 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, } } -void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { +static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -703,6 +705,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, } } +void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); +} + +void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); +} + static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 557b4c55f..d7835f4a7 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -588,8 +588,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/; + + add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/; add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index 73deb733f..72e42adc9 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { // TODO(debargha, peter): Break up large functions into smaller ones // in this file. -static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { +void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi16(1); __m128i blimit, limit, thresh; @@ -496,27 +494,12 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, _mm_store_si128((__m128i *)(s - 0 * p), q0); } -static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh, - bd); -} - -// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int count, int bd) { - if (count == 1) - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - else - highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd); +void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { + vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd); + vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd); } void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, @@ -1171,8 +1154,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 2); // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, - thresh, bd); + vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, + thresh, bd); src[0] = t_dst; src[1] = t_dst + 8 * 8; dst[0] = s - 8; @@ -1195,8 +1178,8 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, - thresh, bd); + vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, + thresh, bd); // Transpose back highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); From 110d3778993a4c75353b9ec4e6de19bcd0646570 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 12 Feb 2016 18:17:54 -0800 Subject: [PATCH 15/16] remove loopfilter 'count' param TODOs Change-Id: I25ce7314372ce2f521526ea7864ffc4ab62e4519 --- vp10/common/loopfilter.c | 2 -- vp9/common/vp9_loopfilter.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 11dfe6d70..8f4fc8ccd 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -324,7 +324,6 @@ static void filter_selectively_vert_row2(int subsampling_factor, const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { @@ -422,7 +421,6 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index ee20cc557..aca69bd0f 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -324,7 +324,6 @@ static void filter_selectively_vert_row2(int subsampling_factor, const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { @@ -422,7 +421,6 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - // TODO(yunqingwang): count in loopfilter functions should be removed. if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { From 3ea537c0eeb60d33b5661e965384ca4a2ecdcded Mon Sep 17 00:00:00 2001 From: James Zern Date: Sat, 13 Feb 2016 11:05:24 -0800 Subject: [PATCH 16/16] lpf_8_test: remove unneeded function wrapper the count parameter has been removed from all loopfilter functions Change-Id: I87ba72006b59c65c46ca40bcb1c29171dfe0598a --- test/lpf_8_test.cc | 274 +++++++++++++++++++++------------------------ 1 file changed, 125 insertions(+), 149 deletions(-) diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 13a4c476f..b16f14c8e 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -37,43 +37,21 @@ const int number_of_iterations = 10000; #if CONFIG_VP9_HIGHBITDEPTH typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, - int count, int bd); + int bd); typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); - -// wrapper for loopfilter functions without a 'count' param. -typedef void (*loop_op_nc_t)(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd); -template -void wrapper_nc(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int /*count*/, int bd) { - fn(s, p, blimit, limit, thresh, bd); -} #else typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count); + const uint8_t *limit, const uint8_t *thresh); typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -// wrapper for loopfilter functions without a 'count' param. -typedef void (*loop_op_nc_t)(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh); -template -void wrapper_nc(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int /*count*/) { - fn(s, p, blimit, limit, thresh); -} #endif // CONFIG_VP9_HIGHBITDEPTH -typedef std::tr1::tuple loop8_param_t; +typedef std::tr1::tuple loop8_param_t; typedef std::tr1::tuple dualloop8_param_t; class Loop8Test6Param : public ::testing::TestWithParam { @@ -83,7 +61,6 @@ class Loop8Test6Param : public ::testing::TestWithParam { loopfilter_op_ = GET_PARAM(0); ref_loopfilter_op_ = GET_PARAM(1); bit_depth_ = GET_PARAM(2); - count_ = GET_PARAM(3); mask_ = (1 << bit_depth_) - 1; } @@ -91,7 +68,6 @@ class Loop8Test6Param : public ::testing::TestWithParam { protected: int bit_depth_; - int count_; int mask_; loop_op_t loopfilter_op_; loop_op_t ref_loopfilter_op_; @@ -178,13 +154,13 @@ TEST_P(Loop8Test6Param, OperationCheck) { ref_s[j] = s[j]; } #if CONFIG_VP9_HIGHBITDEPTH - ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd); + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd)); #else - ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_); + ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { @@ -250,13 +226,13 @@ TEST_P(Loop8Test6Param, ValueCheck) { ref_s[j] = s[j]; } #if CONFIG_VP9_HIGHBITDEPTH - ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd); + ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd)); #else - ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_); + ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh); ASM_REGISTER_STATE_CHECK( - loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_)); + loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { err_count += ref_s[j] != s[j]; @@ -458,10 +434,10 @@ using std::tr1::make_tuple; INSTANTIATE_TEST_CASE_P( MMX, Loop8Test6Param, ::testing::Values( - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1))); + make_tuple(&vpx_lpf_horizontal_4_mmx, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_vertical_4_mmx, + &vpx_lpf_vertical_4_c, 8))); #endif // HAVE_MMX #if HAVE_SSE2 @@ -469,70 +445,70 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 10, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 12, 1))); + make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, + &vpx_highbd_lpf_horizontal_4_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_4_sse2, + &vpx_highbd_lpf_vertical_4_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, + &vpx_highbd_lpf_horizontal_8_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, + &vpx_highbd_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, + &vpx_highbd_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_8_sse2, + &vpx_highbd_lpf_vertical_8_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_16_sse2, + &vpx_highbd_lpf_vertical_16_c, 8), + make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, + &vpx_highbd_lpf_horizontal_4_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_4_sse2, + &vpx_highbd_lpf_vertical_4_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, + &vpx_highbd_lpf_horizontal_8_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, + &vpx_highbd_lpf_horizontal_edge_8_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, + &vpx_highbd_lpf_horizontal_edge_16_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_8_sse2, + &vpx_highbd_lpf_vertical_8_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_16_sse2, + &vpx_highbd_lpf_vertical_16_c, 10), + make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, + &vpx_highbd_lpf_horizontal_4_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_4_sse2, + &vpx_highbd_lpf_vertical_4_c, 12), + make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, + &vpx_highbd_lpf_horizontal_8_c, 12), + make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2, + &vpx_highbd_lpf_horizontal_edge_8_c, 12), + make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2, + &vpx_highbd_lpf_horizontal_edge_16_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_8_sse2, + &vpx_highbd_lpf_vertical_8_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_16_sse2, + &vpx_highbd_lpf_vertical_16_c, 12), + make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, + &vpx_highbd_lpf_vertical_16_dual_c, 8), + make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, + &vpx_highbd_lpf_vertical_16_dual_c, 10), + make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, + &vpx_highbd_lpf_vertical_16_dual_c, 12))); #else INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1))); + make_tuple(&vpx_lpf_horizontal_8_sse2, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_8_sse2, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_sse2, + &vpx_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_lpf_vertical_8_sse2, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_vertical_16_sse2, + &vpx_lpf_vertical_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_dual_sse2, + &vpx_lpf_vertical_16_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif @@ -540,10 +516,10 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( AVX2, Loop8Test6Param, ::testing::Values( - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1))); + make_tuple(&vpx_lpf_horizontal_edge_8_avx2, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_avx2, + &vpx_lpf_horizontal_edge_16_c, 8))); #endif #if HAVE_SSE2 @@ -600,23 +576,23 @@ INSTANTIATE_TEST_CASE_P( #if HAVE_NEON_ASM // Using #if inside the macro is unsupported on MSVS but the tests are not // currently built for MSVS with ARM and NEON. - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), + make_tuple(&vpx_lpf_horizontal_edge_8_neon, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_neon, + &vpx_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_neon, + &vpx_lpf_vertical_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_dual_neon, + &vpx_lpf_vertical_16_dual_c, 8), #endif // HAVE_NEON_ASM - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1))); + make_tuple(&vpx_lpf_horizontal_8_neon, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_vertical_8_neon, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_horizontal_4_neon, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_vertical_4_neon, + &vpx_lpf_vertical_4_c, 8))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values( @@ -637,22 +613,22 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test6Param, ::testing::Values( - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1))); + make_tuple(&vpx_lpf_horizontal_4_dspr2, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_horizontal_8_dspr2, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_8, + &vpx_lpf_horizontal_edge_8, 8), + make_tuple(&vpx_lpf_horizontal_edge_16, + &vpx_lpf_horizontal_edge_16, 8), + make_tuple(&vpx_lpf_vertical_4_dspr2, + &vpx_lpf_vertical_4_c, 8), + make_tuple(&vpx_lpf_vertical_8_dspr2, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_vertical_16_dspr2, + &vpx_lpf_vertical_16_c, 8), + make_tuple(&vpx_lpf_vertical_16_dual_dspr2, + &vpx_lpf_vertical_16_dual_c, 8))); INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test9Param, @@ -671,20 +647,20 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1), - make_tuple(&wrapper_nc, - &wrapper_nc, 8, 1))); + make_tuple(&vpx_lpf_horizontal_4_msa, + &vpx_lpf_horizontal_4_c, 8), + make_tuple(&vpx_lpf_horizontal_8_msa, + &vpx_lpf_horizontal_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_8_msa, + &vpx_lpf_horizontal_edge_8_c, 8), + make_tuple(&vpx_lpf_horizontal_edge_16_msa, + &vpx_lpf_horizontal_edge_16_c, 8), + make_tuple(&vpx_lpf_vertical_4_msa, + &vpx_lpf_vertical_4_c, 8), + make_tuple(&vpx_lpf_vertical_8_msa, + &vpx_lpf_vertical_8_c, 8), + make_tuple(&vpx_lpf_vertical_16_msa, + &vpx_lpf_vertical_16_c, 8))); INSTANTIATE_TEST_CASE_P( MSA, Loop8Test9Param,