From cc868da5266f164eff34d19e7431abd8056e6ea4 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sat, 8 Apr 2017 22:41:41 +0000 Subject: [PATCH] ppc: d45 predictor 32x32 About 12x faster. Change-Id: I22c150256aefb4941861ab1f6c17d554fb694bed --- test/test_intra_pred_speed.cc | 4 ++-- vpx_dsp/ppc/intrapred_vsx.c | 23 +++++++++++++++++++++++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index a97f90a1f..0e89dfb0e 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -325,8 +325,8 @@ INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx, INTRA_PRED_TEST(VSX, TestIntraPred32, vpx_dc_predictor_32x32_vsx, vpx_dc_left_predictor_32x32_vsx, vpx_dc_top_predictor_32x32_vsx, vpx_dc_128_predictor_32x32_vsx, vpx_v_predictor_32x32_vsx, - vpx_h_predictor_32x32_vsx, NULL, NULL, NULL, NULL, NULL, NULL, - vpx_tm_predictor_32x32_vsx) + vpx_h_predictor_32x32_vsx, vpx_d45_predictor_32x32_vsx, NULL, + NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_vsx) #endif // HAVE_VSX // ----------------------------------------------------------------------------- diff --git a/vpx_dsp/ppc/intrapred_vsx.c b/vpx_dsp/ppc/intrapred_vsx.c index 6c7d7f94d..dba062762 100644 --- a/vpx_dsp/ppc/intrapred_vsx.c +++ b/vpx_dsp/ppc/intrapred_vsx.c @@ -518,3 +518,26 @@ void vpx_d45_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, row = vec_perm(row, above_right, sl1); } } + +void vpx_d45_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + const uint8x16_t a0 = vec_vsx_ld(0, above); + const uint8x16_t a1 = vec_vsx_ld(16, above); + const uint8x16_t above_right = vec_splat(a1, 15); + const uint8x16_t b0 = vec_perm(a0, a1, sl1); + const uint8x16_t b1 = vec_perm(a1, above_right, sl1); + const uint8x16_t c0 = vec_perm(b0, b1, sl1); + const uint8x16_t c1 = vec_perm(b1, above_right, sl1); + uint8x16_t row0 = avg3(a0, b0, c0); + uint8x16_t row1 = avg3(a1, b1, c1); + int i; + (void)left; + + for (i = 0; i < 32; i++) { + vec_vsx_st(row0, 0, dst); + vec_vsx_st(row1, 16, dst); + dst += stride; + row0 = vec_perm(row0, row1, sl1); + row1 = vec_perm(row1, above_right, sl1); + } +} diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 707c1c8c2..ac08d0d9c 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -151,7 +151,7 @@ add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, specialize qw/vpx_d207_predictor_32x32 ssse3/; add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_32x32 neon ssse3/; +specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/; add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63_predictor_32x32 ssse3/;