From f695b30ac2f8bac8af381c0436e88086fd4c7112 Mon Sep 17 00:00:00 2001 From: Johann Date: Wed, 3 May 2017 14:58:52 -0700 Subject: [PATCH] comp_avg_pred neon: used by sub pixel avg variance BUG=webm:1423 Change-Id: I33de537f238f58f89b7a6c1c2d6e8110de4b8804 --- test/comp_avg_pred_test.cc | 6 ++++ vpx_dsp/arm/avg_pred_neon.c | 55 ++++++++++++++++++++++++++++++++++++ vpx_dsp/vpx_dsp.mk | 1 + vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 +- 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 vpx_dsp/arm/avg_pred_neon.c diff --git a/test/comp_avg_pred_test.cc b/test/comp_avg_pred_test.cc index 3feba7127..dce673eec 100644 --- a/test/comp_avg_pred_test.cc +++ b/test/comp_avg_pred_test.cc @@ -156,6 +156,12 @@ INSTANTIATE_TEST_CASE_P(C, AvgPredTest, INSTANTIATE_TEST_CASE_P(SSE2, AvgPredTest, ::testing::Values(&vpx_comp_avg_pred_sse2)); #endif // HAVE_SSE2 + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P(NEON, AvgPredTest, + ::testing::Values(&vpx_comp_avg_pred_neon)); +#endif // HAVE_NEON + #if HAVE_VSX INSTANTIATE_TEST_CASE_P(VSX, AvgPredTest, ::testing::Values(&vpx_comp_avg_pred_vsx)); diff --git a/vpx_dsp/arm/avg_pred_neon.c b/vpx_dsp/arm/avg_pred_neon.c new file mode 100644 index 000000000..1370ec2d2 --- /dev/null +++ b/vpx_dsp/arm/avg_pred_neon.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/mem_neon.h" + +void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + if (width > 8) { + int x, y; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; x += 16) { + const uint8x16_t p = vld1q_u8(pred + x); + const uint8x16_t r = vld1q_u8(ref + x); + const uint8x16_t avg = vrhaddq_u8(p, r); + vst1q_u8(comp + x, avg); + } + comp += width; + pred += width; + ref += ref_stride; + } + } else { + int i; + for (i = 0; i < width * height; i += 16) { + const uint8x16_t p = vld1q_u8(pred); + uint8x16_t r; + + if (width == 4) { + r = load_unaligned_u8q(ref, ref_stride); + ref += 4 * ref_stride; + } else { + const uint8x8_t r_0 = vld1_u8(ref); + const uint8x8_t r_1 = vld1_u8(ref + ref_stride); + assert(width == 8); + r = vcombine_u8(r_0, r_1); + ref += 2 * ref_stride; + } + r = vrhaddq_u8(r, p); + vst1q_u8(comp, r); + + pred += 16; + comp += 16; + } + } +} diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 6ac7182ab..da057c883 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -325,6 +325,7 @@ ifneq ($(filter yes,$(CONFIG_ENCODERS) $(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC) DSP_SRCS-yes += variance.c DSP_SRCS-yes += variance.h +DSP_SRCS-$(HAVE_NEON) += arm/avg_pred_neon.c DSP_SRCS-$(HAVE_NEON) += arm/subpel_variance_neon.c DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index c67483641..62e190a1a 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1175,7 +1175,7 @@ add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int specialize qw/vpx_get4x4sse_cs neon msa vsx/; add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; - specialize qw/vpx_comp_avg_pred sse2 vsx/; + specialize qw/vpx_comp_avg_pred neon sse2 vsx/; # # Subpixel Variance