vpx/vp8/common/reconintra4x4.c
Han Shen b72d3e8a25 Earmark extra space for VSX.
Backend specific optimization for PPC VSX reads 16 bytes, whereas arm neon /
sse2 only reads <= 8 bytes. Although the extra bytes read are actually never
used, this is not a warrant for groping around.  Fixed by allocating more when
building for VSX. This is reported by asan.

Also note - PPC does have assembly that loads 64-bit content from memory - lxsdx
loads one 64-bit doubleword (whereas lxvd2x loads two 64-bit doubleword) from
memory. However, we only have "vec_vsx_ld" builtins that mapped to lxvd2x, no
builtins to lxsdx. The only way to access lxsdx is through inline assembly,
which does not fit well in the origin paradigm.

Refer:
  vsx:
    vpx_tm_predictor_4x4_vsx @ third_party/libvpx/git_root/vpx_dsp/ppc/intrapred_vsx.c
  neon:
    vpx_tm_predictor_4x4_neon @ third_party/libvpx/git_root/vpx_dsp/arm/intrapred_neon_asm.asm
  sse2:
    tm_predictor_4x4 @ third_party/libvpx/git_root/vpx_dsp/x86/intrapred_sse2.asm

BUG=b/63112600

Tested:
  asan tests passed.

Change-Id: I5f74b56e35c05b67851de8b5530aece213f2ce9d
2017-07-19 13:59:32 -07:00

76 lines
2.4 KiB
C

/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "blockd.h"
#include "reconintra4x4.h"
#include "vp8/common/common.h"
#include "vpx_ports/mem.h"
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
static intra_pred_fn pred[10];
void vp8_init_intra4x4_predictors_internal(void) {
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
pred[B_HE_PRED] = vpx_he_predictor_4x4;
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
pred[B_VL_PRED] = vpx_d63e_predictor_4x4;
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
}
void vp8_intra4x4_predict(unsigned char *above, unsigned char *yleft,
int left_stride, B_PREDICTION_MODE b_mode,
unsigned char *dst, int dst_stride,
unsigned char top_left) {
/* Power PC implementation uses "vec_vsx_ld" to read 16 bytes from
Above (aka, Aboveb + 4). Play it safe by reserving enough stack
space here. Similary for "Left". */
#if HAVE_VSX
unsigned char Aboveb[20];
#else
unsigned char Aboveb[12];
#endif
unsigned char *Above = Aboveb + 4;
#if HAVE_NEON
// Neon intrinsics are unable to load 32 bits, or 4 8 bit values. Instead, it
// over reads but does not use the extra 4 values.
unsigned char Left[8];
#if VPX_WITH_ASAN
// Silence an 'uninitialized read' warning. Although uninitialized values are
// indeed read, they are not used.
vp8_zero_array(Left, 8);
#endif // VPX_WITH_ASAN
#elif HAVE_VSX
unsigned char Left[16];
#else
unsigned char Left[4];
#endif // HAVE_NEON
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
Left[2] = yleft[2 * left_stride];
Left[3] = yleft[3 * left_stride];
memcpy(Above, above, 8);
Above[-1] = top_left;
pred[b_mode](dst, dst_stride, Above, Left);
}