From cc7f0c0f3e168b31d4b5e5f42e0a368fa24ff62a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandra=20H=C3=A1jkov=C3=A1?= Date: Sat, 6 May 2017 13:21:19 +0000 Subject: [PATCH] ppc: Add vpx_sad16x8/16/32_vsx Change-Id: I60619d28fffd9809f93b1af510a50e1aa02519a9 --- test/sad_test.cc | 9 ++++++ vpx_dsp/ppc/sad_vsx.c | 53 ++++++++++++++++++++++++++++++++++++ vpx_dsp/vpx_dsp.mk | 2 ++ vpx_dsp/vpx_dsp_rtcd_defs.pl | 6 ++-- 4 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 vpx_dsp/ppc/sad_vsx.c diff --git a/test/sad_test.cc b/test/sad_test.cc index 837b08fbd..23991f56f 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc @@ -920,4 +920,13 @@ const SadMxNx4Param x4d_msa_tests[] = { INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests)); #endif // HAVE_MSA +//------------------------------------------------------------------------------ +// VSX functions +#if HAVE_VSX +const SadMxNParam vsx_tests[] = { + SadMxNParam(16, 32, &vpx_sad16x32_vsx), + SadMxNParam(16, 16, &vpx_sad16x16_vsx), SadMxNParam(16, 8, &vpx_sad16x8_vsx), +}; +INSTANTIATE_TEST_CASE_P(VSX, SADTest, ::testing::ValuesIn(vsx_tests)); +#endif // HAVE_VSX } // namespace diff --git a/vpx_dsp/ppc/sad_vsx.c b/vpx_dsp/ppc/sad_vsx.c new file mode 100644 index 000000000..fb6c7d5d5 --- /dev/null +++ b/vpx_dsp/ppc/sad_vsx.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "vpx_dsp/ppc/types_vsx.h" + +#include "vpx/vpx_integer.h" + +#define PROCESS16(offset) \ + v_a = vec_vsx_ld(offset, a); \ + v_b = vec_vsx_ld(offset, b); \ + v_ah = unpack_to_s16_h(v_a); \ + v_al = unpack_to_s16_l(v_a); \ + v_bh = unpack_to_s16_h(v_b); \ + v_bl = unpack_to_s16_l(v_b); \ + v_subh = vec_sub(v_ah, v_bh); \ + v_subl = vec_sub(v_al, v_bl); \ + v_absh = vec_abs(v_subh); \ + v_absl = vec_abs(v_subl); \ + v_sad = vec_sum4s(v_absh, v_sad); \ + v_sad = vec_sum4s(v_absl, v_sad); + +#define SAD16(height) \ + unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride) { \ + int y; \ + unsigned int sad[4]; \ + uint8x16_t v_a, v_b; \ + int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ + int32x4_t v_sad = vec_splat_s32(0); \ + \ + for (y = 0; y < height; y++) { \ + PROCESS16(0); \ + \ + a += a_stride; \ + b += b_stride; \ + } \ + vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ + \ + return sad[3] + sad[2] + sad[1] + sad[0]; \ + } + +SAD16(8); +SAD16(16); +SAD16(32); diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 6daa58390..976db2b65 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -311,6 +311,8 @@ DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/subtract_sse2.asm +DSP_SRCS-$(HAVE_VSX) += ppc/sad_vsx.c + ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 629690b05..2a8796137 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -767,13 +767,13 @@ add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride specialize qw/vpx_sad32x16 avx2 msa sse2/; add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x32 msa sse2/; +specialize qw/vpx_sad16x32 msa sse2 vsx/; add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x16 neon msa sse2/; +specialize qw/vpx_sad16x16 neon msa sse2 vsx/; add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x8 neon msa sse2/; +specialize qw/vpx_sad16x8 neon msa sse2 vsx/; add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_sad8x16 neon msa sse2/;