vp8: change build_intra_predictors_mbuv_s to use vpx_dsp.

Change-Id: I936c2430c3c5b1e0ab5dec0a20110525e925b5e4
This commit is contained in:
Ronald S. Bultje
2015-09-30 11:46:35 -04:00
parent 54d48955f6
commit 7cdcfee82c
10 changed files with 55 additions and 1372 deletions

View File

@@ -1,317 +0,0 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
#include "./vp8_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "vp8/common/blockd.h"
#include "vpx_mem/vpx_mem.h"
namespace {
using libvpx_test::ACMRandom;
class IntraPredBase {
public:
virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
protected:
void SetupMacroblock(MACROBLOCKD *mbptr,
MODE_INFO *miptr,
uint8_t *data,
int block_size,
int stride,
int num_planes) {
mbptr_ = mbptr;
miptr_ = miptr;
mbptr_->up_available = 1;
mbptr_->left_available = 1;
mbptr_->mode_info_context = miptr_;
stride_ = stride;
block_size_ = block_size;
num_planes_ = num_planes;
for (int p = 0; p < num_planes; p++)
data_ptr_[p] = data + stride * (block_size + 1) * p +
stride + block_size;
}
void FillRandom() {
// Fill edges with random data
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int p = 0; p < num_planes_; p++) {
for (int x = -1 ; x <= block_size_; x++)
data_ptr_[p][x - stride_] = rnd.Rand8();
for (int y = 0; y < block_size_; y++)
data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
}
}
virtual void Predict(MB_PREDICTION_MODE mode) = 0;
void SetLeftUnavailable() {
mbptr_->left_available = 0;
for (int p = 0; p < num_planes_; p++)
for (int i = -1; i < block_size_; ++i)
data_ptr_[p][stride_ * i - 1] = 129;
}
void SetTopUnavailable() {
mbptr_->up_available = 0;
for (int p = 0; p < num_planes_; p++)
memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
}
void SetTopLeftUnavailable() {
SetLeftUnavailable();
SetTopUnavailable();
}
int BlockSizeLog2Min1() const {
switch (block_size_) {
case 16:
return 3;
case 8:
return 2;
default:
return 0;
}
}
// check DC prediction output against a reference
void CheckDCPrediction() const {
for (int p = 0; p < num_planes_; p++) {
// calculate expected DC
int expected;
if (mbptr_->up_available || mbptr_->left_available) {
int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
mbptr_->left_available;
if (mbptr_->up_available)
for (int x = 0; x < block_size_; x++)
sum += data_ptr_[p][x - stride_];
if (mbptr_->left_available)
for (int y = 0; y < block_size_; y++)
sum += data_ptr_[p][y * stride_ - 1];
expected = (sum + (1 << (shift - 1))) >> shift;
} else {
expected = 0x80;
}
// check that all subsequent lines are equal to the first
for (int y = 1; y < block_size_; ++y)
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
block_size_));
// within the first line, ensure that each pixel has the same value
for (int x = 1; x < block_size_; ++x)
ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
// now ensure that that pixel has the expected (DC) value
ASSERT_EQ(expected, data_ptr_[p][0]);
}
}
// check V prediction output against a reference
void CheckVPrediction() const {
// check that all lines equal the top border
for (int p = 0; p < num_planes_; p++)
for (int y = 0; y < block_size_; y++)
ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
&data_ptr_[p][y * stride_], block_size_));
}
// check H prediction output against a reference
void CheckHPrediction() const {
// for each line, ensure that each pixel is equal to the left border
for (int p = 0; p < num_planes_; p++)
for (int y = 0; y < block_size_; y++)
for (int x = 0; x < block_size_; x++)
ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
data_ptr_[p][x + y * stride_]);
}
static int ClipByte(int value) {
if (value > 255)
return 255;
else if (value < 0)
return 0;
return value;
}
// check TM prediction output against a reference
void CheckTMPrediction() const {
for (int p = 0; p < num_planes_; p++)
for (int y = 0; y < block_size_; y++)
for (int x = 0; x < block_size_; x++) {
const int expected = ClipByte(data_ptr_[p][x - stride_]
+ data_ptr_[p][stride_ * y - 1]
- data_ptr_[p][-1 - stride_]);
ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
}
}
// Actual test
void RunTest() {
{
SCOPED_TRACE("DC_PRED");
FillRandom();
Predict(DC_PRED);
CheckDCPrediction();
}
{
SCOPED_TRACE("DC_PRED LEFT");
FillRandom();
SetLeftUnavailable();
Predict(DC_PRED);
CheckDCPrediction();
}
{
SCOPED_TRACE("DC_PRED TOP");
FillRandom();
SetTopUnavailable();
Predict(DC_PRED);
CheckDCPrediction();
}
{
SCOPED_TRACE("DC_PRED TOP_LEFT");
FillRandom();
SetTopLeftUnavailable();
Predict(DC_PRED);
CheckDCPrediction();
}
{
SCOPED_TRACE("H_PRED");
FillRandom();
Predict(H_PRED);
CheckHPrediction();
}
{
SCOPED_TRACE("V_PRED");
FillRandom();
Predict(V_PRED);
CheckVPrediction();
}
{
SCOPED_TRACE("TM_PRED");
FillRandom();
Predict(TM_PRED);
CheckTMPrediction();
}
}
MACROBLOCKD *mbptr_;
MODE_INFO *miptr_;
uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
int stride_;
int block_size_;
int num_planes_;
};
typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
uint8_t *uabove_row,
uint8_t *vabove_row,
uint8_t *uleft,
uint8_t *vleft,
int left_stride,
uint8_t *upred_ptr,
uint8_t *vpred_ptr,
int pred_stride);
class IntraPredUVTest
: public IntraPredBase,
public ::testing::TestWithParam<IntraPredUvFunc> {
public:
static void SetUpTestCase() {
mb_ = reinterpret_cast<MACROBLOCKD*>(
vpx_memalign(32, sizeof(MACROBLOCKD)));
mi_ = reinterpret_cast<MODE_INFO*>(
vpx_memalign(32, sizeof(MODE_INFO)));
data_array_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
static void TearDownTestCase() {
vpx_free(data_array_);
vpx_free(mi_);
vpx_free(mb_);
data_array_ = NULL;
}
protected:
static const int kBlockSize = 8;
static const int kDataAlignment = 8;
static const int kStride = kBlockSize * 3;
// We use 24 so that the data pointer of the first pixel in each row of
// each macroblock is 8-byte aligned, and this gives us access to the
// top-left and top-right corner pixels belonging to the top-left/right
// macroblocks.
// We use 9 lines so we have one line above us for top-prediction.
// [0] = U, [1] = V
static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
virtual void SetUp() {
pred_fn_ = GetParam();
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
}
virtual void Predict(MB_PREDICTION_MODE mode) {
mbptr_->mode_info_context->mbmi.uv_mode = mode;
pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
data_ptr_[0], data_ptr_[1], kStride);
}
IntraPredUvFunc pred_fn_;
// We use 24 so that the data pointer of the first pixel in each row of
// each macroblock is 8-byte aligned, and this gives us access to the
// top-left and top-right corner pixels belonging to the top-left/right
// macroblocks.
// We use 9 lines so we have one line above us for top-prediction.
// [0] = U, [1] = V
static uint8_t* data_array_;
static MACROBLOCKD* mb_;
static MODE_INFO* mi_;
};
MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
MODE_INFO* IntraPredUVTest::mi_ = NULL;
uint8_t* IntraPredUVTest::data_array_ = NULL;
TEST_P(IntraPredUVTest, IntraPredTests) {
RunTest();
}
INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
::testing::Values(
vp8_build_intra_predictors_mbuv_s_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
::testing::Values(
vp8_build_intra_predictors_mbuv_s_sse2));
#endif
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
::testing::Values(
vp8_build_intra_predictors_mbuv_s_ssse3));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
::testing::Values(
vp8_build_intra_predictors_mbuv_s_neon));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(MSA, IntraPredUVTest,
::testing::Values(
vp8_build_intra_predictors_mbuv_s_msa));
#endif
} // namespace

View File

@@ -111,7 +111,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
LIBVPX_TEST_SRCS-yes += idct_test.cc
LIBVPX_TEST_SRCS-yes += intrapred_test.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-yes += vpx_scale_test.cc

View File

@@ -1,124 +0,0 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "vp8/common/blockd.h"
void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride) {
const int mode = x->mode_info_context->mbmi.uv_mode;
int i;
switch (mode) {
case DC_PRED:
{
int shift = x->up_available + x->left_available;
uint8x8_t v_expected_udc = vdup_n_u8(128);
uint8x8_t v_expected_vdc = vdup_n_u8(128);
if (shift) {
unsigned int average_u = 0;
unsigned int average_v = 0;
int expected_udc;
int expected_vdc;
if (x->up_available) {
const uint8x8_t v_uabove = vld1_u8(uabove_row);
const uint8x8_t v_vabove = vld1_u8(vabove_row);
const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove));
const uint32x4_t b = vpaddlq_u16(a);
const uint64x2_t c = vpaddlq_u32(b);
average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0);
average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2);
}
if (x->left_available) {
for (i = 0; i < 8; ++i) {
average_u += uleft[0];
uleft += left_stride;
average_v += vleft[0];
vleft += left_stride;
}
}
shift += 2;
expected_udc = (average_u + (1 << (shift - 1))) >> shift;
expected_vdc = (average_v + (1 << (shift - 1))) >> shift;
v_expected_udc = vmov_n_u8((uint8_t)expected_udc);
v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc);
}
for (i = 0; i < 8; ++i) {
vst1_u8(upred_ptr, v_expected_udc);
upred_ptr += pred_stride;
vst1_u8(vpred_ptr, v_expected_vdc);
vpred_ptr += pred_stride;
}
}
break;
case V_PRED:
{
const uint8x8_t v_uabove = vld1_u8(uabove_row);
const uint8x8_t v_vabove = vld1_u8(vabove_row);
for (i = 0; i < 8; ++i) {
vst1_u8(upred_ptr, v_uabove);
upred_ptr += pred_stride;
vst1_u8(vpred_ptr, v_vabove);
vpred_ptr += pred_stride;
}
}
break;
case H_PRED:
{
for (i = 0; i < 8; ++i) {
const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]);
const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]);
uleft += left_stride;
vleft += left_stride;
vst1_u8(upred_ptr, v_uleft);
upred_ptr += pred_stride;
vst1_u8(vpred_ptr, v_vleft);
vpred_ptr += pred_stride;
}
}
break;
case TM_PRED:
{
const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]);
const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]);
const uint8x8_t v_uabove = vld1_u8(uabove_row);
const uint8x8_t v_vabove = vld1_u8(vabove_row);
for (i = 0; i < 8; ++i) {
const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]);
const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]);
const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft);
const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft);
const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u),
vreinterpretq_s16_u16(v_utop_left));
const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v),
vreinterpretq_s16_u16(v_vtop_left));
const uint8x8_t pred_u = vqmovun_s16(b_u);
const uint8x8_t pred_v = vqmovun_s16(b_v);
vst1_u8(upred_ptr, pred_u);
vst1_u8(vpred_ptr, pred_v);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
uleft += left_stride;
vleft += left_stride;
}
}
break;
}
}

View File

@@ -1,182 +0,0 @@
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vp8/common/blockd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst,
int32_t dst_stride)
{
uint64_t out = LD(src);
SD4(out, out, out, out, dst, dst_stride);
dst += (4 * dst_stride);
SD4(out, out, out, out, dst, dst_stride);
}
static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
out0 = src[0 * src_stride] * 0x0101010101010101ull;
out1 = src[1 * src_stride] * 0x0101010101010101ull;
out2 = src[2 * src_stride] * 0x0101010101010101ull;
out3 = src[3 * src_stride] * 0x0101010101010101ull;
out4 = src[4 * src_stride] * 0x0101010101010101ull;
out5 = src[5 * src_stride] * 0x0101010101010101ull;
out6 = src[6 * src_stride] * 0x0101010101010101ull;
out7 = src[7 * src_stride] * 0x0101010101010101ull;
SD4(out0, out1, out2, out3, dst, dst_stride);
dst += (4 * dst_stride);
SD4(out4, out5, out6, out7, dst, dst_stride);
}
static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
int32_t src_stride_left,
uint8_t *dst, int32_t dst_stride,
uint8_t is_above, uint8_t is_left)
{
uint32_t row, addition = 0;
uint64_t out;
v16u8 src_above, store;
v8u16 sum_above;
v4u32 sum_top;
v2u64 sum;
if (is_left && is_above)
{
src_above = LD_UB(src_top);
sum_above = __msa_hadd_u_h(src_above, src_above);
sum_top = __msa_hadd_u_w(sum_above, sum_above);
sum = __msa_hadd_u_d(sum_top, sum_top);
addition = __msa_copy_u_w((v4i32)sum, 0);
for (row = 0; row < 8; ++row)
{
addition += src_left[row * src_stride_left];
}
addition = (addition + 8) >> 4;
store = (v16u8)__msa_fill_b(addition);
}
else if (is_left)
{
for (row = 0; row < 8; ++row)
{
addition += src_left[row * src_stride_left];
}
addition = (addition + 4) >> 3;
store = (v16u8)__msa_fill_b(addition);
}
else if (is_above)
{
src_above = LD_UB(src_top);
sum_above = __msa_hadd_u_h(src_above, src_above);
sum_top = __msa_hadd_u_w(sum_above, sum_above);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum = (v2u64)__msa_srari_d((v2i64)sum, 3);
store = (v16u8)__msa_splati_b((v16i8)sum, 0);
}
else
{
store = (v16u8)__msa_ldi_b(128);
}
out = __msa_copy_u_d((v2i64)store, 0);
SD4(out, out, out, out, dst, dst_stride);
dst += (4 * dst_stride);
SD4(out, out, out, out, dst, dst_stride);
}
void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x,
unsigned char *uabove_row,
unsigned char *vabove_row,
unsigned char *uleft,
unsigned char *vleft,
int left_stride,
unsigned char *upred_ptr,
unsigned char *vpred_ptr,
int pred_stride)
{
uint32_t row, col;
uint8_t utop_left = uabove_row[-1];
uint8_t vtop_left = vabove_row[-1];
switch (x->mode_info_context->mbmi.uv_mode)
{
case DC_PRED:
intra_predict_dc_8x8_msa(uabove_row, uleft, left_stride,
upred_ptr, pred_stride,
x->up_available, x->left_available);
intra_predict_dc_8x8_msa(vabove_row, vleft, left_stride,
vpred_ptr, pred_stride,
x->up_available, x->left_available);
break;
case V_PRED:
intra_predict_vert_8x8_msa(uabove_row, upred_ptr, pred_stride);
intra_predict_vert_8x8_msa(vabove_row, vpred_ptr, pred_stride);
break;
case H_PRED:
intra_predict_horiz_8x8_msa(uleft, left_stride, upred_ptr,
pred_stride);
intra_predict_horiz_8x8_msa(vleft, left_stride, vpred_ptr,
pred_stride);
break;
case TM_PRED:
for (row = 0; row < 8; ++row)
{
for (col = 0; col < 8; ++col)
{
int predu = uleft[row * left_stride] + uabove_row[col] -
utop_left;
int predv = vleft[row * left_stride] + vabove_row[col] -
vtop_left;
if (predu < 0)
predu = 0;
if (predu > 255)
predu = 255;
if (predv < 0)
predv = 0;
if (predv > 255)
predv = 255;
upred_ptr[col] = predu;
vpred_ptr[col] = predv;
}
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
break;
case B_PRED:
case NEARESTMV:
case NEARMV:
case ZEROMV:
case NEWMV:
case SPLITMV:
case MB_MODE_COUNT:
break;
}
}

View File

@@ -17,22 +17,32 @@
#include "blockd.h"
#include "vp8/common/reconintra.h"
enum {
SIZE_16,
SIZE_8,
NUM_SIZES,
};
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
static intra_pred_fn pred[4];
static intra_pred_fn dc_pred[2][2];
static intra_pred_fn pred[4][NUM_SIZES];
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
static void vp8_init_intra_predictors_internal(void)
{
pred[V_PRED] = vpx_v_predictor_16x16;
pred[H_PRED] = vpx_h_predictor_16x16;
pred[TM_PRED] = vpx_tm_predictor_16x16;
#define INIT_SIZE(sz) \
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
\
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
dc_pred[0][0] = vpx_dc_128_predictor_16x16;
dc_pred[0][1] = vpx_dc_top_predictor_16x16;
dc_pred[1][0] = vpx_dc_left_predictor_16x16;
dc_pred[1][1] = vpx_dc_predictor_16x16;
INIT_SIZE(16);
INIT_SIZE(8);
}
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
@@ -45,6 +55,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
unsigned char yleft_col[16];
int i;
intra_pred_fn fn;
for (i = 0; i < 16; i++)
{
@@ -53,154 +64,49 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
if (mode == DC_PRED)
{
dc_pred[x->left_available][x->up_available](ypred_ptr, y_stride,
yabove_row, yleft_col);
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
}
else
{
pred[mode](ypred_ptr, y_stride, yabove_row, yleft_col);
fn = pred[mode][SIZE_16];
}
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
}
void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
unsigned char uleft_col[8];
unsigned char utop_left = uabove_row[-1];
unsigned char vleft_col[8];
unsigned char vtop_left = vabove_row[-1];
int i, j;
int i;
intra_pred_fn fn;
for (i = 0; i < 8; i++)
{
uleft_col[i] = uleft [i* left_stride];
vleft_col[i] = vleft [i* left_stride];
uleft_col[i] = uleft[i * left_stride];
vleft_col[i] = vleft[i * left_stride];
}
switch (x->mode_info_context->mbmi.uv_mode)
if (uvmode == DC_PRED)
{
case DC_PRED:
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
}
else
{
int expected_udc;
int expected_vdc;
int shift;
int Uaverage = 0;
int Vaverage = 0;
if (x->up_available)
{
for (i = 0; i < 8; i++)
{
Uaverage += uabove_row[i];
Vaverage += vabove_row[i];
}
}
if (x->left_available)
{
for (i = 0; i < 8; i++)
{
Uaverage += uleft_col[i];
Vaverage += vleft_col[i];
}
}
if (!x->up_available && !x->left_available)
{
expected_udc = 128;
expected_vdc = 128;
}
else
{
shift = 2 + x->up_available + x->left_available;
expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
}
/*memset(upred_ptr,expected_udc,64);*/
/*memset(vpred_ptr,expected_vdc,64);*/
for (i = 0; i < 8; i++)
{
memset(upred_ptr, expected_udc, 8);
memset(vpred_ptr, expected_vdc, 8);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
}
break;
case V_PRED:
{
for (i = 0; i < 8; i++)
{
memcpy(upred_ptr, uabove_row, 8);
memcpy(vpred_ptr, vabove_row, 8);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
}
break;
case H_PRED:
{
for (i = 0; i < 8; i++)
{
memset(upred_ptr, uleft_col[i], 8);
memset(vpred_ptr, vleft_col[i], 8);
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
fn = pred[uvmode][SIZE_8];
}
break;
case TM_PRED:
{
for (i = 0; i < 8; i++)
{
for (j = 0; j < 8; j++)
{
int predu = uleft_col[i] + uabove_row[j] - utop_left;
int predv = vleft_col[i] + vabove_row[j] - vtop_left;
if (predu < 0)
predu = 0;
if (predu > 255)
predu = 255;
if (predv < 0)
predv = 0;
if (predv > 255)
predv = 255;
upred_ptr[j] = predu;
vpred_ptr[j] = predv;
}
upred_ptr += pred_stride;
vpred_ptr += pred_stride;
}
}
break;
case B_PRED:
case NEARESTMV:
case NEARMV:
case ZEROMV:
case NEWMV:
case SPLITMV:
case MB_MODE_COUNT:
break;
}
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
}
void vp8_init_intra_predictors(void)

View File

@@ -25,6 +25,16 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char *ypred_ptr,
int y_stride);
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride);
void vp8_init_intra_predictors(void);
#ifdef __cplusplus

View File

@@ -152,9 +152,6 @@ specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/;
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/;
add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
specialize qw/vp8_intra4x4_predict media/;
$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;

View File

@@ -114,495 +114,3 @@ sym(vp8_copy_mem16x16_sse2):
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dc_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
mov rdi, arg(2) ;above;
mov rsi, arg(3) ;left;
movsxd rax, dword ptr arg(4) ;left_stride;
pxor mm0, mm0
movq mm1, [rdi]
lea rdi, [rax*3]
psadbw mm1, mm0
; from left
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax*1]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
lea rsi, [rsi+rax*4]
add ecx, edx
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
; add up
pextrw edx, mm1, 0x0
lea edx, [edx+ecx+8]
sar edx, 4
movd mm1, edx
movsxd rcx, dword ptr arg(1) ;dst_stride
pshufw mm1, mm1, 0x0
mov rdi, arg(0) ;dst;
packuswb mm1, mm1
; write out
lea rax, [rcx*3]
lea rdx, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
movq [rdx ], mm1
movq [rdx+rcx ], mm1
movq [rdx+rcx*2], mm1
movq [rdx+rax ], mm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dctop_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
;arg(3), arg(4) not used
; from top
mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
; add up
paddw mm1, [GLOBAL(dc_4)]
psraw mm1, 3
pshufw mm1, mm1, 0x0
packuswb mm1, mm1
; write out
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
lea rdi, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dcleft_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
;arg(2) not used
; from left
mov rsi, arg(3) ;left;
movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
lea edx, [ecx+edx+4]
; add up
shr edx, 3
movd mm1, edx
pshufw mm1, mm1, 0x0
packuswb mm1, mm1
; write out
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
lea rdi, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dc128_mmx(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
; end prolog
;arg(2), arg(3), arg(4) not used
; write out
movq mm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
movsxd rdx, dword ptr arg(1) ;dst_stride
lea rcx, [rdx*3]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
lea rax, [rax+rdx*4]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
; begin epilog
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_tm_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
push rbx
; end prolog
; read top row
mov edx, 4
mov rsi, arg(2) ;above
movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_1024)]
%endif
movq xmm1, [rsi]
punpcklbw xmm1, xmm0
; set up left ptrs ans subtract topleft
movd xmm3, [rsi-1]
mov rsi, arg(3) ;left;
%ifidn %1, sse2
punpcklbw xmm3, xmm0
pshuflw xmm3, xmm3, 0x0
punpcklqdq xmm3, xmm3
%else
pshufb xmm3, xmm2
%endif
psubw xmm1, xmm3
; set up dest ptrs
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
.vp8_intra_pred_uv_tm_%1_loop:
mov bl, [rsi]
movd xmm3, ebx
mov bl, [rsi+rax]
movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm3, xmm0
punpcklbw xmm5, xmm0
pshuflw xmm3, xmm3, 0x0
pshuflw xmm5, xmm5, 0x0
punpcklqdq xmm3, xmm3
punpcklqdq xmm5, xmm5
%else
pshufb xmm3, xmm2
pshufb xmm5, xmm2
%endif
paddw xmm3, xmm1
paddw xmm5, xmm1
packuswb xmm3, xmm5
movq [rdi ], xmm3
movhps[rdi+rcx], xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
jnz .vp8_intra_pred_uv_tm_%1_loop
; begin epilog
pop rbx
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
%endmacro
vp8_intra_pred_uv_tm sse2
vp8_intra_pred_uv_tm ssse3
;void vp8_intra_pred_uv_ve_mmx(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
; end prolog
; arg(3), arg(4) not used
; read from top
mov rax, arg(2) ;src;
movq mm1, [rax]
; write out
mov rax, arg(0) ;dst;
movsxd rdx, dword ptr arg(1) ;dst_stride
lea rcx, [rdx*3]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
lea rax, [rax+rdx*4]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
; begin epilog
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_ho_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
%macro vp8_intra_pred_uv_ho 1
global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
push rbx
%ifidn %1, ssse3
GET_GOT rbx
%endif
; end prolog
;arg(2) not used
; read from left and write out
%ifidn %1, mmx2
mov edx, 4
%endif
mov rsi, arg(3) ;left
movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
%ifidn %1, ssse3
lea rdx, [rcx*3]
movdqa xmm2, [GLOBAL(dc_00001111)]
%endif
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
mov bl, [rsi]
movd mm0, ebx
mov bl, [rsi+rax]
movd mm1, ebx
punpcklbw mm0, mm0
punpcklbw mm1, mm1
pshufw mm0, mm0, 0x0
pshufw mm1, mm1, 0x0
movq [rdi ], mm0
movq [rdi+rcx], mm1
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
jnz .vp8_intra_pred_uv_ho_%1_loop
%else
mov bl, [rsi]
movd xmm0, ebx
mov bl, [rsi+rax]
movd xmm3, ebx
mov bl, [rsi+rax*2]
movd xmm1, ebx
lea rbx, [rax*3]
mov bl, [rsi+rbx]
movd xmm4, ebx
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
pshufb xmm1, xmm2
movq [rdi ], xmm0
movhps [rdi+rcx], xmm0
movq [rdi+rcx*2], xmm1
movhps [rdi+rdx], xmm1
lea rsi, [rsi+rax*4]
lea rdi, [rdi+rcx*4]
mov bl, [rsi]
movd xmm0, ebx
mov bl, [rsi+rax]
movd xmm3, ebx
mov bl, [rsi+rax*2]
movd xmm1, ebx
lea rbx, [rax*3]
mov bl, [rsi+rbx]
movd xmm4, ebx
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
pshufb xmm1, xmm2
movq [rdi ], xmm0
movhps [rdi+rcx], xmm0
movq [rdi+rcx*2], xmm1
movhps [rdi+rdx], xmm1
%endif
; begin epilog
%ifidn %1, ssse3
RESTORE_GOT
%endif
pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
%endmacro
vp8_intra_pred_uv_ho mmx2
vp8_intra_pred_uv_ho ssse3
SECTION_RODATA
align 16
dc_128:
times 8 db 128
dc_4:
times 4 dw 4
align 16
dc_1024:
times 8 dw 0x400
align 16
dc_00001111:
times 8 db 0
times 8 db 1

View File

@@ -1,111 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/blockd.h"
#define build_intra_predictors_mbuv_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
const unsigned char *above, \
const unsigned char *left, int left_stride)
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_stride,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
build_intra_predictors_mbuv_fn_t tm_func,
build_intra_predictors_mbuv_fn_t ho_func)
{
int mode = x->mode_info_context->mbmi.uv_mode;
build_intra_predictors_mbuv_fn_t fn;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
case H_PRED: fn = ho_func; break;
case TM_PRED: fn = tm_func; break;
case DC_PRED:
if (x->up_available) {
if (x->left_available) {
fn = vp8_intra_pred_uv_dc_mmx2; break;
} else {
fn = vp8_intra_pred_uv_dctop_mmx2; break;
}
} else if (x->left_available) {
fn = vp8_intra_pred_uv_dcleft_mmx2; break;
} else {
fn = vp8_intra_pred_uv_dc128_mmx; break;
}
break;
default: return;
}
fn(dst_u, dst_stride, uabove_row, uleft, left_stride);
fn(dst_v, dst_stride, vabove_row, vleft, left_stride);
}
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
vp8_build_intra_predictors_mbuv_x86(x,
uabove_row, vabove_row,
upred_ptr,
vpred_ptr, pred_stride,
uleft,
vleft,
left_stride,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
vp8_build_intra_predictors_mbuv_x86(x,
uabove_row, vabove_row,
upred_ptr,
vpred_ptr, pred_stride,
uleft,
vleft,
left_stride,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}

View File

@@ -89,7 +89,6 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
@@ -119,7 +118,6 @@ VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/bilinear_filter_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/copymem_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/loopfilter_filters_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/reconintra_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/sixtap_filter_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h
@@ -166,7 +164,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_loopfilter_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/reconintra_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c