vp8: change build_intra_predictors_mbuv_s to use vpx_dsp.
Change-Id: I936c2430c3c5b1e0ab5dec0a20110525e925b5e4
This commit is contained in:
@@ -1,317 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
class IntraPredBase {
|
||||
public:
|
||||
virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
void SetupMacroblock(MACROBLOCKD *mbptr,
|
||||
MODE_INFO *miptr,
|
||||
uint8_t *data,
|
||||
int block_size,
|
||||
int stride,
|
||||
int num_planes) {
|
||||
mbptr_ = mbptr;
|
||||
miptr_ = miptr;
|
||||
mbptr_->up_available = 1;
|
||||
mbptr_->left_available = 1;
|
||||
mbptr_->mode_info_context = miptr_;
|
||||
stride_ = stride;
|
||||
block_size_ = block_size;
|
||||
num_planes_ = num_planes;
|
||||
for (int p = 0; p < num_planes; p++)
|
||||
data_ptr_[p] = data + stride * (block_size + 1) * p +
|
||||
stride + block_size;
|
||||
}
|
||||
|
||||
void FillRandom() {
|
||||
// Fill edges with random data
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int p = 0; p < num_planes_; p++) {
|
||||
for (int x = -1 ; x <= block_size_; x++)
|
||||
data_ptr_[p][x - stride_] = rnd.Rand8();
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) = 0;
|
||||
|
||||
void SetLeftUnavailable() {
|
||||
mbptr_->left_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int i = -1; i < block_size_; ++i)
|
||||
data_ptr_[p][stride_ * i - 1] = 129;
|
||||
}
|
||||
|
||||
void SetTopUnavailable() {
|
||||
mbptr_->up_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
|
||||
}
|
||||
|
||||
void SetTopLeftUnavailable() {
|
||||
SetLeftUnavailable();
|
||||
SetTopUnavailable();
|
||||
}
|
||||
|
||||
int BlockSizeLog2Min1() const {
|
||||
switch (block_size_) {
|
||||
case 16:
|
||||
return 3;
|
||||
case 8:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// check DC prediction output against a reference
|
||||
void CheckDCPrediction() const {
|
||||
for (int p = 0; p < num_planes_; p++) {
|
||||
// calculate expected DC
|
||||
int expected;
|
||||
if (mbptr_->up_available || mbptr_->left_available) {
|
||||
int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
|
||||
mbptr_->left_available;
|
||||
if (mbptr_->up_available)
|
||||
for (int x = 0; x < block_size_; x++)
|
||||
sum += data_ptr_[p][x - stride_];
|
||||
if (mbptr_->left_available)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
sum += data_ptr_[p][y * stride_ - 1];
|
||||
expected = (sum + (1 << (shift - 1))) >> shift;
|
||||
} else {
|
||||
expected = 0x80;
|
||||
}
|
||||
// check that all subsequent lines are equal to the first
|
||||
for (int y = 1; y < block_size_; ++y)
|
||||
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
|
||||
block_size_));
|
||||
// within the first line, ensure that each pixel has the same value
|
||||
for (int x = 1; x < block_size_; ++x)
|
||||
ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
|
||||
// now ensure that that pixel has the expected (DC) value
|
||||
ASSERT_EQ(expected, data_ptr_[p][0]);
|
||||
}
|
||||
}
|
||||
|
||||
// check V prediction output against a reference
|
||||
void CheckVPrediction() const {
|
||||
// check that all lines equal the top border
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
|
||||
&data_ptr_[p][y * stride_], block_size_));
|
||||
}
|
||||
|
||||
// check H prediction output against a reference
|
||||
void CheckHPrediction() const {
|
||||
// for each line, ensure that each pixel is equal to the left border
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
for (int x = 0; x < block_size_; x++)
|
||||
ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
|
||||
data_ptr_[p][x + y * stride_]);
|
||||
}
|
||||
|
||||
static int ClipByte(int value) {
|
||||
if (value > 255)
|
||||
return 255;
|
||||
else if (value < 0)
|
||||
return 0;
|
||||
return value;
|
||||
}
|
||||
|
||||
// check TM prediction output against a reference
|
||||
void CheckTMPrediction() const {
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
for (int x = 0; x < block_size_; x++) {
|
||||
const int expected = ClipByte(data_ptr_[p][x - stride_]
|
||||
+ data_ptr_[p][stride_ * y - 1]
|
||||
- data_ptr_[p][-1 - stride_]);
|
||||
ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
|
||||
}
|
||||
}
|
||||
|
||||
// Actual test
|
||||
void RunTest() {
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED");
|
||||
FillRandom();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED LEFT");
|
||||
FillRandom();
|
||||
SetLeftUnavailable();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED TOP");
|
||||
FillRandom();
|
||||
SetTopUnavailable();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED TOP_LEFT");
|
||||
FillRandom();
|
||||
SetTopLeftUnavailable();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("H_PRED");
|
||||
FillRandom();
|
||||
Predict(H_PRED);
|
||||
CheckHPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("V_PRED");
|
||||
FillRandom();
|
||||
Predict(V_PRED);
|
||||
CheckVPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("TM_PRED");
|
||||
FillRandom();
|
||||
Predict(TM_PRED);
|
||||
CheckTMPrediction();
|
||||
}
|
||||
}
|
||||
|
||||
MACROBLOCKD *mbptr_;
|
||||
MODE_INFO *miptr_;
|
||||
uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
|
||||
int stride_;
|
||||
int block_size_;
|
||||
int num_planes_;
|
||||
};
|
||||
|
||||
typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
|
||||
uint8_t *uabove_row,
|
||||
uint8_t *vabove_row,
|
||||
uint8_t *uleft,
|
||||
uint8_t *vleft,
|
||||
int left_stride,
|
||||
uint8_t *upred_ptr,
|
||||
uint8_t *vpred_ptr,
|
||||
int pred_stride);
|
||||
|
||||
class IntraPredUVTest
|
||||
: public IntraPredBase,
|
||||
public ::testing::TestWithParam<IntraPredUvFunc> {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
mb_ = reinterpret_cast<MACROBLOCKD*>(
|
||||
vpx_memalign(32, sizeof(MACROBLOCKD)));
|
||||
mi_ = reinterpret_cast<MODE_INFO*>(
|
||||
vpx_memalign(32, sizeof(MODE_INFO)));
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
vpx_free(mi_);
|
||||
vpx_free(mb_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
protected:
|
||||
static const int kBlockSize = 8;
|
||||
static const int kDataAlignment = 8;
|
||||
static const int kStride = kBlockSize * 3;
|
||||
// We use 24 so that the data pointer of the first pixel in each row of
|
||||
// each macroblock is 8-byte aligned, and this gives us access to the
|
||||
// top-left and top-right corner pixels belonging to the top-left/right
|
||||
// macroblocks.
|
||||
// We use 9 lines so we have one line above us for top-prediction.
|
||||
// [0] = U, [1] = V
|
||||
static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mbptr_->mode_info_context->mbmi.uv_mode = mode;
|
||||
pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
|
||||
data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
|
||||
data_ptr_[0], data_ptr_[1], kStride);
|
||||
}
|
||||
|
||||
IntraPredUvFunc pred_fn_;
|
||||
// We use 24 so that the data pointer of the first pixel in each row of
|
||||
// each macroblock is 8-byte aligned, and this gives us access to the
|
||||
// top-left and top-right corner pixels belonging to the top-left/right
|
||||
// macroblocks.
|
||||
// We use 9 lines so we have one line above us for top-prediction.
|
||||
// [0] = U, [1] = V
|
||||
static uint8_t* data_array_;
|
||||
static MACROBLOCKD* mb_;
|
||||
static MODE_INFO* mi_;
|
||||
};
|
||||
|
||||
MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
|
||||
MODE_INFO* IntraPredUVTest::mi_ = NULL;
|
||||
uint8_t* IntraPredUVTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredUVTest, IntraPredTests) {
|
||||
RunTest();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_c));
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_sse2));
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_ssse3));
|
||||
#endif
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_neon));
|
||||
#endif
|
||||
#if HAVE_MSA
|
||||
INSTANTIATE_TEST_CASE_P(MSA, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_msa));
|
||||
#endif
|
||||
|
||||
} // namespace
|
@@ -111,7 +111,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
|
||||
|
||||
LIBVPX_TEST_SRCS-yes += idct_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += intrapred_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += vpx_scale_test.cc
|
||||
|
||||
|
@@ -1,124 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride) {
|
||||
const int mode = x->mode_info_context->mbmi.uv_mode;
|
||||
int i;
|
||||
|
||||
switch (mode) {
|
||||
case DC_PRED:
|
||||
{
|
||||
int shift = x->up_available + x->left_available;
|
||||
uint8x8_t v_expected_udc = vdup_n_u8(128);
|
||||
uint8x8_t v_expected_vdc = vdup_n_u8(128);
|
||||
|
||||
if (shift) {
|
||||
unsigned int average_u = 0;
|
||||
unsigned int average_v = 0;
|
||||
int expected_udc;
|
||||
int expected_vdc;
|
||||
if (x->up_available) {
|
||||
const uint8x8_t v_uabove = vld1_u8(uabove_row);
|
||||
const uint8x8_t v_vabove = vld1_u8(vabove_row);
|
||||
const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove));
|
||||
const uint32x4_t b = vpaddlq_u16(a);
|
||||
const uint64x2_t c = vpaddlq_u32(b);
|
||||
average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0);
|
||||
average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2);
|
||||
}
|
||||
if (x->left_available) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
average_u += uleft[0];
|
||||
uleft += left_stride;
|
||||
average_v += vleft[0];
|
||||
vleft += left_stride;
|
||||
}
|
||||
}
|
||||
shift += 2;
|
||||
expected_udc = (average_u + (1 << (shift - 1))) >> shift;
|
||||
expected_vdc = (average_v + (1 << (shift - 1))) >> shift;
|
||||
v_expected_udc = vmov_n_u8((uint8_t)expected_udc);
|
||||
v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc);
|
||||
}
|
||||
for (i = 0; i < 8; ++i) {
|
||||
vst1_u8(upred_ptr, v_expected_udc);
|
||||
upred_ptr += pred_stride;
|
||||
vst1_u8(vpred_ptr, v_expected_vdc);
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case V_PRED:
|
||||
{
|
||||
const uint8x8_t v_uabove = vld1_u8(uabove_row);
|
||||
const uint8x8_t v_vabove = vld1_u8(vabove_row);
|
||||
for (i = 0; i < 8; ++i) {
|
||||
vst1_u8(upred_ptr, v_uabove);
|
||||
upred_ptr += pred_stride;
|
||||
vst1_u8(vpred_ptr, v_vabove);
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case H_PRED:
|
||||
{
|
||||
for (i = 0; i < 8; ++i) {
|
||||
const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]);
|
||||
const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]);
|
||||
uleft += left_stride;
|
||||
vleft += left_stride;
|
||||
vst1_u8(upred_ptr, v_uleft);
|
||||
upred_ptr += pred_stride;
|
||||
vst1_u8(vpred_ptr, v_vleft);
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TM_PRED:
|
||||
{
|
||||
const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]);
|
||||
const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]);
|
||||
const uint8x8_t v_uabove = vld1_u8(uabove_row);
|
||||
const uint8x8_t v_vabove = vld1_u8(vabove_row);
|
||||
for (i = 0; i < 8; ++i) {
|
||||
const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]);
|
||||
const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]);
|
||||
const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft);
|
||||
const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft);
|
||||
const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u),
|
||||
vreinterpretq_s16_u16(v_utop_left));
|
||||
const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v),
|
||||
vreinterpretq_s16_u16(v_vtop_left));
|
||||
const uint8x8_t pred_u = vqmovun_s16(b_u);
|
||||
const uint8x8_t pred_v = vqmovun_s16(b_v);
|
||||
|
||||
vst1_u8(upred_ptr, pred_u);
|
||||
vst1_u8(vpred_ptr, pred_v);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
uleft += left_stride;
|
||||
vleft += left_stride;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
@@ -1,182 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
uint64_t out = LD(src);
|
||||
|
||||
SD4(out, out, out, out, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
SD4(out, out, out, out, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
|
||||
|
||||
out0 = src[0 * src_stride] * 0x0101010101010101ull;
|
||||
out1 = src[1 * src_stride] * 0x0101010101010101ull;
|
||||
out2 = src[2 * src_stride] * 0x0101010101010101ull;
|
||||
out3 = src[3 * src_stride] * 0x0101010101010101ull;
|
||||
out4 = src[4 * src_stride] * 0x0101010101010101ull;
|
||||
out5 = src[5 * src_stride] * 0x0101010101010101ull;
|
||||
out6 = src[6 * src_stride] * 0x0101010101010101ull;
|
||||
out7 = src[7 * src_stride] * 0x0101010101010101ull;
|
||||
|
||||
SD4(out0, out1, out2, out3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
SD4(out4, out5, out6, out7, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
|
||||
int32_t src_stride_left,
|
||||
uint8_t *dst, int32_t dst_stride,
|
||||
uint8_t is_above, uint8_t is_left)
|
||||
{
|
||||
uint32_t row, addition = 0;
|
||||
uint64_t out;
|
||||
v16u8 src_above, store;
|
||||
v8u16 sum_above;
|
||||
v4u32 sum_top;
|
||||
v2u64 sum;
|
||||
|
||||
if (is_left && is_above)
|
||||
{
|
||||
src_above = LD_UB(src_top);
|
||||
|
||||
sum_above = __msa_hadd_u_h(src_above, src_above);
|
||||
sum_top = __msa_hadd_u_w(sum_above, sum_above);
|
||||
sum = __msa_hadd_u_d(sum_top, sum_top);
|
||||
addition = __msa_copy_u_w((v4i32)sum, 0);
|
||||
|
||||
for (row = 0; row < 8; ++row)
|
||||
{
|
||||
addition += src_left[row * src_stride_left];
|
||||
}
|
||||
|
||||
addition = (addition + 8) >> 4;
|
||||
store = (v16u8)__msa_fill_b(addition);
|
||||
}
|
||||
else if (is_left)
|
||||
{
|
||||
for (row = 0; row < 8; ++row)
|
||||
{
|
||||
addition += src_left[row * src_stride_left];
|
||||
}
|
||||
|
||||
addition = (addition + 4) >> 3;
|
||||
store = (v16u8)__msa_fill_b(addition);
|
||||
}
|
||||
else if (is_above)
|
||||
{
|
||||
src_above = LD_UB(src_top);
|
||||
|
||||
sum_above = __msa_hadd_u_h(src_above, src_above);
|
||||
sum_top = __msa_hadd_u_w(sum_above, sum_above);
|
||||
sum = __msa_hadd_u_d(sum_top, sum_top);
|
||||
sum = (v2u64)__msa_srari_d((v2i64)sum, 3);
|
||||
store = (v16u8)__msa_splati_b((v16i8)sum, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
store = (v16u8)__msa_ldi_b(128);
|
||||
}
|
||||
|
||||
out = __msa_copy_u_d((v2i64)store, 0);
|
||||
|
||||
SD4(out, out, out, out, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
SD4(out, out, out, out, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x,
|
||||
unsigned char *uabove_row,
|
||||
unsigned char *vabove_row,
|
||||
unsigned char *uleft,
|
||||
unsigned char *vleft,
|
||||
int left_stride,
|
||||
unsigned char *upred_ptr,
|
||||
unsigned char *vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
uint32_t row, col;
|
||||
uint8_t utop_left = uabove_row[-1];
|
||||
uint8_t vtop_left = vabove_row[-1];
|
||||
|
||||
switch (x->mode_info_context->mbmi.uv_mode)
|
||||
{
|
||||
case DC_PRED:
|
||||
intra_predict_dc_8x8_msa(uabove_row, uleft, left_stride,
|
||||
upred_ptr, pred_stride,
|
||||
x->up_available, x->left_available);
|
||||
intra_predict_dc_8x8_msa(vabove_row, vleft, left_stride,
|
||||
vpred_ptr, pred_stride,
|
||||
x->up_available, x->left_available);
|
||||
break;
|
||||
|
||||
case V_PRED:
|
||||
intra_predict_vert_8x8_msa(uabove_row, upred_ptr, pred_stride);
|
||||
intra_predict_vert_8x8_msa(vabove_row, vpred_ptr, pred_stride);
|
||||
break;
|
||||
|
||||
case H_PRED:
|
||||
intra_predict_horiz_8x8_msa(uleft, left_stride, upred_ptr,
|
||||
pred_stride);
|
||||
intra_predict_horiz_8x8_msa(vleft, left_stride, vpred_ptr,
|
||||
pred_stride);
|
||||
break;
|
||||
|
||||
case TM_PRED:
|
||||
for (row = 0; row < 8; ++row)
|
||||
{
|
||||
for (col = 0; col < 8; ++col)
|
||||
{
|
||||
int predu = uleft[row * left_stride] + uabove_row[col] -
|
||||
utop_left;
|
||||
int predv = vleft[row * left_stride] + vabove_row[col] -
|
||||
vtop_left;
|
||||
|
||||
if (predu < 0)
|
||||
predu = 0;
|
||||
|
||||
if (predu > 255)
|
||||
predu = 255;
|
||||
|
||||
if (predv < 0)
|
||||
predv = 0;
|
||||
|
||||
if (predv > 255)
|
||||
predv = 255;
|
||||
|
||||
upred_ptr[col] = predu;
|
||||
vpred_ptr[col] = predv;
|
||||
}
|
||||
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
break;
|
||||
|
||||
case B_PRED:
|
||||
case NEARESTMV:
|
||||
case NEARMV:
|
||||
case ZEROMV:
|
||||
case NEWMV:
|
||||
case SPLITMV:
|
||||
case MB_MODE_COUNT:
|
||||
break;
|
||||
}
|
||||
}
|
@@ -17,22 +17,32 @@
|
||||
#include "blockd.h"
|
||||
#include "vp8/common/reconintra.h"
|
||||
|
||||
enum {
|
||||
SIZE_16,
|
||||
SIZE_8,
|
||||
NUM_SIZES,
|
||||
};
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
static intra_pred_fn pred[4];
|
||||
static intra_pred_fn dc_pred[2][2];
|
||||
static intra_pred_fn pred[4][NUM_SIZES];
|
||||
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
|
||||
|
||||
static void vp8_init_intra_predictors_internal(void)
|
||||
{
|
||||
pred[V_PRED] = vpx_v_predictor_16x16;
|
||||
pred[H_PRED] = vpx_h_predictor_16x16;
|
||||
pred[TM_PRED] = vpx_tm_predictor_16x16;
|
||||
#define INIT_SIZE(sz) \
|
||||
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
|
||||
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
|
||||
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
|
||||
\
|
||||
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
|
||||
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
|
||||
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
|
||||
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
|
||||
|
||||
dc_pred[0][0] = vpx_dc_128_predictor_16x16;
|
||||
dc_pred[0][1] = vpx_dc_top_predictor_16x16;
|
||||
dc_pred[1][0] = vpx_dc_left_predictor_16x16;
|
||||
dc_pred[1][1] = vpx_dc_predictor_16x16;
|
||||
INIT_SIZE(16);
|
||||
INIT_SIZE(8);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
@@ -45,6 +55,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
|
||||
unsigned char yleft_col[16];
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
@@ -53,154 +64,49 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
|
||||
if (mode == DC_PRED)
|
||||
{
|
||||
dc_pred[x->left_available][x->up_available](ypred_ptr, y_stride,
|
||||
yabove_row, yleft_col);
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
|
||||
}
|
||||
else
|
||||
{
|
||||
pred[mode](ypred_ptr, y_stride, yabove_row, yleft_col);
|
||||
fn = pred[mode][SIZE_16];
|
||||
}
|
||||
|
||||
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
|
||||
unsigned char uleft_col[8];
|
||||
unsigned char utop_left = uabove_row[-1];
|
||||
unsigned char vleft_col[8];
|
||||
unsigned char vtop_left = vabove_row[-1];
|
||||
|
||||
int i, j;
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uleft_col[i] = uleft [i* left_stride];
|
||||
vleft_col[i] = vleft [i* left_stride];
|
||||
uleft_col[i] = uleft[i * left_stride];
|
||||
vleft_col[i] = vleft[i * left_stride];
|
||||
}
|
||||
|
||||
switch (x->mode_info_context->mbmi.uv_mode)
|
||||
if (uvmode == DC_PRED)
|
||||
{
|
||||
case DC_PRED:
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
|
||||
}
|
||||
else
|
||||
{
|
||||
int expected_udc;
|
||||
int expected_vdc;
|
||||
int shift;
|
||||
int Uaverage = 0;
|
||||
int Vaverage = 0;
|
||||
|
||||
if (x->up_available)
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
Uaverage += uabove_row[i];
|
||||
Vaverage += vabove_row[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (x->left_available)
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
Uaverage += uleft_col[i];
|
||||
Vaverage += vleft_col[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (!x->up_available && !x->left_available)
|
||||
{
|
||||
expected_udc = 128;
|
||||
expected_vdc = 128;
|
||||
}
|
||||
else
|
||||
{
|
||||
shift = 2 + x->up_available + x->left_available;
|
||||
expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
|
||||
expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
|
||||
}
|
||||
|
||||
|
||||
/*memset(upred_ptr,expected_udc,64);*/
|
||||
/*memset(vpred_ptr,expected_vdc,64);*/
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
memset(upred_ptr, expected_udc, 8);
|
||||
memset(vpred_ptr, expected_vdc, 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case V_PRED:
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
memcpy(upred_ptr, uabove_row, 8);
|
||||
memcpy(vpred_ptr, vabove_row, 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case H_PRED:
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
memset(upred_ptr, uleft_col[i], 8);
|
||||
memset(vpred_ptr, vleft_col[i], 8);
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
fn = pred[uvmode][SIZE_8];
|
||||
}
|
||||
|
||||
break;
|
||||
case TM_PRED:
|
||||
{
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
int predu = uleft_col[i] + uabove_row[j] - utop_left;
|
||||
int predv = vleft_col[i] + vabove_row[j] - vtop_left;
|
||||
|
||||
if (predu < 0)
|
||||
predu = 0;
|
||||
|
||||
if (predu > 255)
|
||||
predu = 255;
|
||||
|
||||
if (predv < 0)
|
||||
predv = 0;
|
||||
|
||||
if (predv > 255)
|
||||
predv = 255;
|
||||
|
||||
upred_ptr[j] = predu;
|
||||
vpred_ptr[j] = predv;
|
||||
}
|
||||
|
||||
upred_ptr += pred_stride;
|
||||
vpred_ptr += pred_stride;
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case B_PRED:
|
||||
case NEARESTMV:
|
||||
case NEARMV:
|
||||
case ZEROMV:
|
||||
case NEWMV:
|
||||
case SPLITMV:
|
||||
case MB_MODE_COUNT:
|
||||
break;
|
||||
}
|
||||
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
|
||||
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
|
||||
}
|
||||
|
||||
void vp8_init_intra_predictors(void)
|
||||
|
@@ -25,6 +25,16 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char *ypred_ptr,
|
||||
int y_stride);
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride);
|
||||
|
||||
void vp8_init_intra_predictors(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -152,9 +152,6 @@ specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/;
|
||||
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
|
||||
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
|
||||
|
||||
add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
|
||||
specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/;
|
||||
|
||||
add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
|
||||
specialize qw/vp8_intra4x4_predict media/;
|
||||
$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
|
||||
|
@@ -114,495 +114,3 @@ sym(vp8_copy_mem16x16_sse2):
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_intra_pred_uv_dc_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dc_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; from top
|
||||
mov rdi, arg(2) ;above;
|
||||
mov rsi, arg(3) ;left;
|
||||
movsxd rax, dword ptr arg(4) ;left_stride;
|
||||
pxor mm0, mm0
|
||||
movq mm1, [rdi]
|
||||
lea rdi, [rax*3]
|
||||
psadbw mm1, mm0
|
||||
; from left
|
||||
movzx ecx, byte [rsi]
|
||||
movzx edx, byte [rsi+rax*1]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
|
||||
movzx edx, byte [rsi+rdi]
|
||||
lea rsi, [rsi+rax*4]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
add ecx, edx
|
||||
|
||||
; add up
|
||||
pextrw edx, mm1, 0x0
|
||||
lea edx, [edx+ecx+8]
|
||||
sar edx, 4
|
||||
movd mm1, edx
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
pshufw mm1, mm1, 0x0
|
||||
mov rdi, arg(0) ;dst;
|
||||
packuswb mm1, mm1
|
||||
|
||||
; write out
|
||||
lea rax, [rcx*3]
|
||||
lea rdx, [rdi+rcx*4]
|
||||
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
movq [rdx ], mm1
|
||||
movq [rdx+rcx ], mm1
|
||||
movq [rdx+rcx*2], mm1
|
||||
movq [rdx+rax ], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_dctop_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dctop_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
;arg(3), arg(4) not used
|
||||
|
||||
; from top
|
||||
mov rsi, arg(2) ;above;
|
||||
pxor mm0, mm0
|
||||
movq mm1, [rsi]
|
||||
psadbw mm1, mm0
|
||||
|
||||
; add up
|
||||
paddw mm1, [GLOBAL(dc_4)]
|
||||
psraw mm1, 3
|
||||
pshufw mm1, mm1, 0x0
|
||||
packuswb mm1, mm1
|
||||
|
||||
; write out
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
lea rax, [rcx*3]
|
||||
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
lea rdi, [rdi+rcx*4]
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_dcleft_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dcleft_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
;arg(2) not used
|
||||
|
||||
; from left
|
||||
mov rsi, arg(3) ;left;
|
||||
movsxd rax, dword ptr arg(4) ;left_stride;
|
||||
lea rdi, [rax*3]
|
||||
movzx ecx, byte [rsi]
|
||||
movzx edx, byte [rsi+rax]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
add ecx, edx
|
||||
lea rsi, [rsi+rax*4]
|
||||
movzx edx, byte [rsi]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
lea edx, [ecx+edx+4]
|
||||
|
||||
; add up
|
||||
shr edx, 3
|
||||
movd mm1, edx
|
||||
pshufw mm1, mm1, 0x0
|
||||
packuswb mm1, mm1
|
||||
|
||||
; write out
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
lea rax, [rcx*3]
|
||||
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
lea rdi, [rdi+rcx*4]
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_dc128_mmx(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
|
||||
sym(vp8_intra_pred_uv_dc128_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
; end prolog
|
||||
|
||||
;arg(2), arg(3), arg(4) not used
|
||||
|
||||
; write out
|
||||
movq mm1, [GLOBAL(dc_128)]
|
||||
mov rax, arg(0) ;dst;
|
||||
movsxd rdx, dword ptr arg(1) ;dst_stride
|
||||
lea rcx, [rdx*3]
|
||||
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
lea rax, [rax+rdx*4]
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_tm_sse2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
%macro vp8_intra_pred_uv_tm 1
|
||||
global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
|
||||
sym(vp8_intra_pred_uv_tm_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
; end prolog
|
||||
|
||||
; read top row
|
||||
mov edx, 4
|
||||
mov rsi, arg(2) ;above
|
||||
movsxd rax, dword ptr arg(4) ;left_stride;
|
||||
pxor xmm0, xmm0
|
||||
%ifidn %1, ssse3
|
||||
movdqa xmm2, [GLOBAL(dc_1024)]
|
||||
%endif
|
||||
movq xmm1, [rsi]
|
||||
punpcklbw xmm1, xmm0
|
||||
|
||||
; set up left ptrs ans subtract topleft
|
||||
movd xmm3, [rsi-1]
|
||||
mov rsi, arg(3) ;left;
|
||||
%ifidn %1, sse2
|
||||
punpcklbw xmm3, xmm0
|
||||
pshuflw xmm3, xmm3, 0x0
|
||||
punpcklqdq xmm3, xmm3
|
||||
%else
|
||||
pshufb xmm3, xmm2
|
||||
%endif
|
||||
psubw xmm1, xmm3
|
||||
|
||||
; set up dest ptrs
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
|
||||
.vp8_intra_pred_uv_tm_%1_loop:
|
||||
mov bl, [rsi]
|
||||
movd xmm3, ebx
|
||||
|
||||
mov bl, [rsi+rax]
|
||||
movd xmm5, ebx
|
||||
%ifidn %1, sse2
|
||||
punpcklbw xmm3, xmm0
|
||||
punpcklbw xmm5, xmm0
|
||||
pshuflw xmm3, xmm3, 0x0
|
||||
pshuflw xmm5, xmm5, 0x0
|
||||
punpcklqdq xmm3, xmm3
|
||||
punpcklqdq xmm5, xmm5
|
||||
%else
|
||||
pshufb xmm3, xmm2
|
||||
pshufb xmm5, xmm2
|
||||
%endif
|
||||
paddw xmm3, xmm1
|
||||
paddw xmm5, xmm1
|
||||
packuswb xmm3, xmm5
|
||||
movq [rdi ], xmm3
|
||||
movhps[rdi+rcx], xmm3
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rcx*2]
|
||||
dec edx
|
||||
jnz .vp8_intra_pred_uv_tm_%1_loop
|
||||
|
||||
; begin epilog
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
vp8_intra_pred_uv_tm sse2
|
||||
vp8_intra_pred_uv_tm ssse3
|
||||
|
||||
;void vp8_intra_pred_uv_ve_mmx(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
|
||||
sym(vp8_intra_pred_uv_ve_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
; end prolog
|
||||
|
||||
; arg(3), arg(4) not used
|
||||
|
||||
; read from top
|
||||
mov rax, arg(2) ;src;
|
||||
|
||||
movq mm1, [rax]
|
||||
|
||||
; write out
|
||||
mov rax, arg(0) ;dst;
|
||||
movsxd rdx, dword ptr arg(1) ;dst_stride
|
||||
lea rcx, [rdx*3]
|
||||
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
lea rax, [rax+rdx*4]
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_ho_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *above,
|
||||
; unsigned char *left,
|
||||
; int left_stride
|
||||
; )
|
||||
%macro vp8_intra_pred_uv_ho 1
|
||||
global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
|
||||
sym(vp8_intra_pred_uv_ho_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
%ifidn %1, ssse3
|
||||
GET_GOT rbx
|
||||
%endif
|
||||
; end prolog
|
||||
|
||||
;arg(2) not used
|
||||
|
||||
; read from left and write out
|
||||
%ifidn %1, mmx2
|
||||
mov edx, 4
|
||||
%endif
|
||||
mov rsi, arg(3) ;left
|
||||
movsxd rax, dword ptr arg(4) ;left_stride;
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
%ifidn %1, ssse3
|
||||
lea rdx, [rcx*3]
|
||||
movdqa xmm2, [GLOBAL(dc_00001111)]
|
||||
%endif
|
||||
|
||||
%ifidn %1, mmx2
|
||||
.vp8_intra_pred_uv_ho_%1_loop:
|
||||
mov bl, [rsi]
|
||||
movd mm0, ebx
|
||||
|
||||
mov bl, [rsi+rax]
|
||||
movd mm1, ebx
|
||||
|
||||
punpcklbw mm0, mm0
|
||||
punpcklbw mm1, mm1
|
||||
pshufw mm0, mm0, 0x0
|
||||
pshufw mm1, mm1, 0x0
|
||||
movq [rdi ], mm0
|
||||
movq [rdi+rcx], mm1
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rcx*2]
|
||||
dec edx
|
||||
jnz .vp8_intra_pred_uv_ho_%1_loop
|
||||
%else
|
||||
mov bl, [rsi]
|
||||
movd xmm0, ebx
|
||||
|
||||
mov bl, [rsi+rax]
|
||||
movd xmm3, ebx
|
||||
|
||||
mov bl, [rsi+rax*2]
|
||||
movd xmm1, ebx
|
||||
|
||||
lea rbx, [rax*3]
|
||||
mov bl, [rsi+rbx]
|
||||
movd xmm4, ebx
|
||||
|
||||
punpcklbw xmm0, xmm3
|
||||
punpcklbw xmm1, xmm4
|
||||
pshufb xmm0, xmm2
|
||||
pshufb xmm1, xmm2
|
||||
movq [rdi ], xmm0
|
||||
movhps [rdi+rcx], xmm0
|
||||
movq [rdi+rcx*2], xmm1
|
||||
movhps [rdi+rdx], xmm1
|
||||
lea rsi, [rsi+rax*4]
|
||||
lea rdi, [rdi+rcx*4]
|
||||
|
||||
mov bl, [rsi]
|
||||
movd xmm0, ebx
|
||||
|
||||
mov bl, [rsi+rax]
|
||||
movd xmm3, ebx
|
||||
|
||||
mov bl, [rsi+rax*2]
|
||||
movd xmm1, ebx
|
||||
|
||||
lea rbx, [rax*3]
|
||||
mov bl, [rsi+rbx]
|
||||
movd xmm4, ebx
|
||||
|
||||
punpcklbw xmm0, xmm3
|
||||
punpcklbw xmm1, xmm4
|
||||
pshufb xmm0, xmm2
|
||||
pshufb xmm1, xmm2
|
||||
movq [rdi ], xmm0
|
||||
movhps [rdi+rcx], xmm0
|
||||
movq [rdi+rcx*2], xmm1
|
||||
movhps [rdi+rdx], xmm1
|
||||
%endif
|
||||
|
||||
; begin epilog
|
||||
%ifidn %1, ssse3
|
||||
RESTORE_GOT
|
||||
%endif
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
vp8_intra_pred_uv_ho mmx2
|
||||
vp8_intra_pred_uv_ho ssse3
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
dc_128:
|
||||
times 8 db 128
|
||||
dc_4:
|
||||
times 4 dw 4
|
||||
align 16
|
||||
dc_1024:
|
||||
times 8 dw 0x400
|
||||
align 16
|
||||
dc_00001111:
|
||||
times 8 db 0
|
||||
times 8 db 1
|
||||
|
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#define build_intra_predictors_mbuv_prototype(sym) \
|
||||
void sym(unsigned char *dst, int dst_stride, \
|
||||
const unsigned char *above, \
|
||||
const unsigned char *left, int left_stride)
|
||||
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
|
||||
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
|
||||
|
||||
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_stride,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
build_intra_predictors_mbuv_fn_t tm_func,
|
||||
build_intra_predictors_mbuv_fn_t ho_func)
|
||||
{
|
||||
int mode = x->mode_info_context->mbmi.uv_mode;
|
||||
build_intra_predictors_mbuv_fn_t fn;
|
||||
|
||||
switch (mode) {
|
||||
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
|
||||
case H_PRED: fn = ho_func; break;
|
||||
case TM_PRED: fn = tm_func; break;
|
||||
case DC_PRED:
|
||||
if (x->up_available) {
|
||||
if (x->left_available) {
|
||||
fn = vp8_intra_pred_uv_dc_mmx2; break;
|
||||
} else {
|
||||
fn = vp8_intra_pred_uv_dctop_mmx2; break;
|
||||
}
|
||||
} else if (x->left_available) {
|
||||
fn = vp8_intra_pred_uv_dcleft_mmx2; break;
|
||||
} else {
|
||||
fn = vp8_intra_pred_uv_dc128_mmx; break;
|
||||
}
|
||||
break;
|
||||
default: return;
|
||||
}
|
||||
|
||||
fn(dst_u, dst_stride, uabove_row, uleft, left_stride);
|
||||
fn(dst_v, dst_stride, vabove_row, vleft, left_stride);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv_x86(x,
|
||||
uabove_row, vabove_row,
|
||||
upred_ptr,
|
||||
vpred_ptr, pred_stride,
|
||||
uleft,
|
||||
vleft,
|
||||
left_stride,
|
||||
vp8_intra_pred_uv_tm_sse2,
|
||||
vp8_intra_pred_uv_ho_mmx2);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv_x86(x,
|
||||
uabove_row, vabove_row,
|
||||
upred_ptr,
|
||||
vpred_ptr, pred_stride,
|
||||
uleft,
|
||||
vleft,
|
||||
left_stride,
|
||||
vp8_intra_pred_uv_tm_ssse3,
|
||||
vp8_intra_pred_uv_ho_ssse3);
|
||||
}
|
@@ -89,7 +89,6 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
|
||||
@@ -119,7 +118,6 @@ VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/bilinear_filter_msa.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/copymem_msa.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct_msa.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/loopfilter_filters_msa.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/reconintra_msa.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/sixtap_filter_msa.c
|
||||
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h
|
||||
|
||||
@@ -166,7 +164,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_loopfilter_neon.c
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/reconintra_neon.c
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c
|
||||
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c
|
||||
|
||||
|
Reference in New Issue
Block a user