vp8: change build_intra_predictors_mby_s to use vpx_dsp.

Change-Id: I2000820e0c04de2c975d370a0cf7145330289bb2
This commit is contained in:
Ronald S. Bultje 2015-09-30 11:07:35 -04:00
parent aeae7fc903
commit 54d48955f6
20 changed files with 324 additions and 1247 deletions

View File

@ -216,95 +216,6 @@ class IntraPredBase {
int num_planes_;
};
typedef void (*IntraPredYFunc)(MACROBLOCKD *x,
uint8_t *yabove_row,
uint8_t *yleft,
int left_stride,
uint8_t *ypred_ptr,
int y_stride);
class IntraPredYTest
: public IntraPredBase,
public ::testing::TestWithParam<IntraPredYFunc> {
public:
static void SetUpTestCase() {
mb_ = reinterpret_cast<MACROBLOCKD*>(
vpx_memalign(32, sizeof(MACROBLOCKD)));
mi_ = reinterpret_cast<MODE_INFO*>(
vpx_memalign(32, sizeof(MODE_INFO)));
data_array_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
static void TearDownTestCase() {
vpx_free(data_array_);
vpx_free(mi_);
vpx_free(mb_);
data_array_ = NULL;
}
protected:
static const int kBlockSize = 16;
static const int kDataAlignment = 16;
static const int kStride = kBlockSize * 3;
// We use 48 so that the data pointer of the first pixel in each row of
// each macroblock is 16-byte aligned, and this gives us access to the
// top-left and top-right corner pixels belonging to the top-left/right
// macroblocks.
// We use 17 lines so we have one line above us for top-prediction.
static const int kDataBufferSize = kStride * (kBlockSize + 1);
virtual void SetUp() {
pred_fn_ = GetParam();
SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
}
virtual void Predict(MB_PREDICTION_MODE mode) {
mbptr_->mode_info_context->mbmi.mode = mode;
ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
data_ptr_[0] - kStride,
data_ptr_[0] - 1, kStride,
data_ptr_[0], kStride));
}
IntraPredYFunc pred_fn_;
static uint8_t* data_array_;
static MACROBLOCKD * mb_;
static MODE_INFO *mi_;
};
MACROBLOCKD* IntraPredYTest::mb_ = NULL;
MODE_INFO* IntraPredYTest::mi_ = NULL;
uint8_t* IntraPredYTest::data_array_ = NULL;
TEST_P(IntraPredYTest, IntraPredTests) {
RunTest();
}
INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_sse2));
#endif
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_ssse3));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_neon));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(MSA, IntraPredYTest,
::testing::Values(
vp8_build_intra_predictors_mby_s_msa));
#endif
typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
uint8_t *uabove_row,
uint8_t *vabove_row,

View File

@ -12,92 +12,6 @@
#include "vp8/common/blockd.h"
void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride) {
const int mode = x->mode_info_context->mbmi.mode;
int i;
switch (mode) {
case DC_PRED:
{
int shift = x->up_available + x->left_available;
uint8x16_t v_expected_dc = vdupq_n_u8(128);
if (shift) {
unsigned int average = 0;
int expected_dc;
if (x->up_available) {
const uint8x16_t v_above = vld1q_u8(yabove_row);
const uint16x8_t a = vpaddlq_u8(v_above);
const uint32x4_t b = vpaddlq_u16(a);
const uint64x2_t c = vpaddlq_u32(b);
const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
vreinterpret_u32_u64(vget_high_u64(c)));
average = vget_lane_u32(d, 0);
}
if (x->left_available) {
for (i = 0; i < 16; ++i) {
average += yleft[0];
yleft += left_stride;
}
}
shift += 3;
expected_dc = (average + (1 << (shift - 1))) >> shift;
v_expected_dc = vmovq_n_u8((uint8_t)expected_dc);
}
for (i = 0; i < 16; ++i) {
vst1q_u8(ypred_ptr, v_expected_dc);
ypred_ptr += y_stride;
}
}
break;
case V_PRED:
{
const uint8x16_t v_above = vld1q_u8(yabove_row);
for (i = 0; i < 16; ++i) {
vst1q_u8(ypred_ptr, v_above);
ypred_ptr += y_stride;
}
}
break;
case H_PRED:
{
for (i = 0; i < 16; ++i) {
const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]);
yleft += left_stride;
vst1q_u8(ypred_ptr, v_yleft);
ypred_ptr += y_stride;
}
}
break;
case TM_PRED:
{
const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]);
const uint8x16_t v_above = vld1q_u8(yabove_row);
for (i = 0; i < 16; ++i) {
const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]);
const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft);
const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft);
const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo),
vreinterpretq_s16_u16(v_ytop_left));
const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi),
vreinterpretq_s16_u16(v_ytop_left));
const uint8x8_t pred_lo = vqmovun_s16(b_lo);
const uint8x8_t pred_hi = vqmovun_s16(b_hi);
vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi));
ypred_ptr += y_stride;
yleft += left_stride;
}
}
break;
}
}
void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,

View File

@ -22,16 +22,6 @@ static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst,
SD4(out, out, out, out, dst, dst_stride);
}
static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst,
int32_t dst_stride)
{
v16u8 out = LD_UB(src);
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
}
static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
@ -51,34 +41,6 @@ static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
SD4(out4, out5, out6, out7, dst, dst_stride);
}
static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride)
{
uint32_t row;
uint8_t inp0, inp1, inp2, inp3;
v16u8 src0, src1, src2, src3;
for (row = 4; row--;)
{
inp0 = src[0];
src += src_stride;
inp1 = src[0];
src += src_stride;
inp2 = src[0];
src += src_stride;
inp3 = src[0];
src += src_stride;
src0 = (v16u8)__msa_fill_b(inp0);
src1 = (v16u8)__msa_fill_b(inp1);
src2 = (v16u8)__msa_fill_b(inp2);
src3 = (v16u8)__msa_fill_b(inp3);
ST_UB4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
}
}
static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
int32_t src_stride_left,
uint8_t *dst, int32_t dst_stride,
@ -140,128 +102,6 @@ static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
SD4(out, out, out, out, dst, dst_stride);
}
static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left,
int32_t src_stride_left,
uint8_t *dst, int32_t dst_stride,
uint8_t is_above, uint8_t is_left)
{
uint32_t row;
uint32_t addition = 0;
v16u8 src_above, out;
v8u16 sum_above;
v4u32 sum_top;
v2u64 sum;
if (is_left && is_above)
{
src_above = LD_UB(src_top);
sum_above = __msa_hadd_u_h(src_above, src_above);
sum_top = __msa_hadd_u_w(sum_above, sum_above);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum);
sum = __msa_hadd_u_d(sum_top, sum_top);
addition = __msa_copy_u_w((v4i32)sum, 0);
for (row = 0; row < 16; ++row)
{
addition += src_left[row * src_stride_left];
}
addition = (addition + 16) >> 5;
out = (v16u8)__msa_fill_b(addition);
}
else if (is_left)
{
for (row = 0; row < 16; ++row)
{
addition += src_left[row * src_stride_left];
}
addition = (addition + 8) >> 4;
out = (v16u8)__msa_fill_b(addition);
}
else if (is_above)
{
src_above = LD_UB(src_top);
sum_above = __msa_hadd_u_h(src_above, src_above);
sum_top = __msa_hadd_u_w(sum_above, sum_above);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum);
sum = __msa_hadd_u_d(sum_top, sum_top);
sum = (v2u64)__msa_srari_d((v2i64)sum, 4);
out = (v16u8)__msa_splati_b((v16i8)sum, 0);
}
else
{
out = (v16u8)__msa_ldi_b(128);
}
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
dst += (8 * dst_stride);
ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
}
void vp8_build_intra_predictors_mby_s_msa(struct macroblockd *x,
unsigned char *yabove_row,
unsigned char *yleft,
int left_stride,
unsigned char *ypred_ptr,
int y_stride)
{
uint32_t row, col;
uint8_t ytop_left = yabove_row[-1];
switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
intra_predict_dc_16x16_msa(yabove_row, yleft, left_stride,
ypred_ptr, y_stride,
x->up_available, x->left_available);
break;
case V_PRED:
intra_predict_vert_16x16_msa(yabove_row, ypred_ptr, y_stride);
break;
case H_PRED:
intra_predict_horiz_16x16_msa(yleft, left_stride, ypred_ptr,
y_stride);
break;
case TM_PRED:
for (row = 0; row < 16; ++row)
{
for (col = 0; col < 16; ++col)
{
int pred = yleft[row * left_stride] + yabove_row[col] -
ytop_left;
if (pred < 0)
pred = 0;
if (pred > 255)
pred = 255;
ypred_ptr[col] = pred;
}
ypred_ptr += y_stride;
}
break;
case B_PRED:
case NEARESTMV:
case NEARMV:
case ZEROMV:
case NEWMV:
case SPLITMV:
case MB_MODE_COUNT:
break;
}
}
void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x,
unsigned char *uabove_row,
unsigned char *vabove_row,

View File

@ -9,132 +9,56 @@
*/
#include "vpx_config.h"
#include "vp8_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/vpx_once.h"
#include "blockd.h"
#include "vp8/common/reconintra.h"
void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left);
static intra_pred_fn pred[4];
static intra_pred_fn dc_pred[2][2];
static void vp8_init_intra_predictors_internal(void)
{
pred[V_PRED] = vpx_v_predictor_16x16;
pred[H_PRED] = vpx_h_predictor_16x16;
pred[TM_PRED] = vpx_tm_predictor_16x16;
dc_pred[0][0] = vpx_dc_128_predictor_16x16;
dc_pred[0][1] = vpx_dc_top_predictor_16x16;
dc_pred[1][0] = vpx_dc_left_predictor_16x16;
dc_pred[1][1] = vpx_dc_predictor_16x16;
}
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
unsigned char yleft_col[16];
unsigned char ytop_left = yabove_row[-1];
int r, c, i;
int i;
for (i = 0; i < 16; i++)
{
yleft_col[i] = yleft[i* left_stride];
}
/* for Y */
switch (x->mode_info_context->mbmi.mode)
if (mode == DC_PRED)
{
case DC_PRED:
{
int expected_dc;
int shift;
int average = 0;
if (x->up_available || x->left_available)
{
if (x->up_available)
{
for (i = 0; i < 16; i++)
{
average += yabove_row[i];
}
}
if (x->left_available)
{
for (i = 0; i < 16; i++)
{
average += yleft_col[i];
}
}
shift = 3 + x->up_available + x->left_available;
expected_dc = (average + (1 << (shift - 1))) >> shift;
dc_pred[x->left_available][x->up_available](ypred_ptr, y_stride,
yabove_row, yleft_col);
}
else
{
expected_dc = 128;
}
/*memset(ypred_ptr, expected_dc, 256);*/
for (r = 0; r < 16; r++)
{
memset(ypred_ptr, expected_dc, 16);
ypred_ptr += y_stride;
}
}
break;
case V_PRED:
{
for (r = 0; r < 16; r++)
{
((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
ypred_ptr += y_stride;
}
}
break;
case H_PRED:
{
for (r = 0; r < 16; r++)
{
memset(ypred_ptr, yleft_col[r], 16);
ypred_ptr += y_stride;
}
}
break;
case TM_PRED:
{
for (r = 0; r < 16; r++)
{
for (c = 0; c < 16; c++)
{
int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
if (pred < 0)
pred = 0;
if (pred > 255)
pred = 255;
ypred_ptr[c] = pred;
}
ypred_ptr += y_stride;
}
}
break;
case B_PRED:
case NEARESTMV:
case NEARMV:
case ZEROMV:
case NEWMV:
case SPLITMV:
case MB_MODE_COUNT:
break;
pred[mode](ypred_ptr, y_stride, yabove_row, yleft_col);
}
}
@ -278,3 +202,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
break;
}
}
void vp8_init_intra_predictors(void)
{
once(vp8_init_intra_predictors_internal);
}

34
vp8/common/reconintra.h Normal file
View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP8_COMMON_RECONINTRA_H_
#define VP8_COMMON_RECONINTRA_H_
#include "vp8/common/blockd.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
unsigned char *yabove_row,
unsigned char *yleft,
int left_stride,
unsigned char *ypred_ptr,
int y_stride);
void vp8_init_intra_predictors(void);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP8_COMMON_RECONINTRA_H_

View File

@ -152,9 +152,6 @@ specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/;
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3 neon msa/;
add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/;

View File

@ -593,520 +593,13 @@ sym(vp8_intra_pred_uv_ho_%1):
vp8_intra_pred_uv_ho mmx2
vp8_intra_pred_uv_ho ssse3
;void vp8_intra_pred_y_dc_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
global sym(vp8_intra_pred_y_dc_sse2) PRIVATE
sym(vp8_intra_pred_y_dc_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
mov rdi, arg(2) ;above
mov rsi, arg(3) ;left
movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
movdqa xmm1, [rdi]
psadbw xmm1, xmm0
movq xmm2, xmm1
punpckhqdq xmm1, xmm1
paddw xmm1, xmm2
; from left
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
; add up
pextrw edx, xmm1, 0x0
lea edx, [edx+ecx+16]
sar edx, 5
movd xmm1, edx
; FIXME use pshufb for ssse3 version
pshuflw xmm1, xmm1, 0x0
punpcklqdq xmm1, xmm1
packuswb xmm1, xmm1
; write out
mov rsi, 2
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
.label
movdqa [rdi ], xmm1
movdqa [rdi+rcx ], xmm1
movdqa [rdi+rcx*2], xmm1
movdqa [rdi+rax ], xmm1
lea rdi, [rdi+rcx*4]
movdqa [rdi ], xmm1
movdqa [rdi+rcx ], xmm1
movdqa [rdi+rcx*2], xmm1
movdqa [rdi+rax ], xmm1
lea rdi, [rdi+rcx*4]
dec rsi
jnz .label
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_y_dctop_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE
sym(vp8_intra_pred_y_dctop_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
GET_GOT rbx
; end prolog
;arg(3), arg(4) not used
; from top
mov rcx, arg(2) ;above;
pxor xmm0, xmm0
movdqa xmm1, [rcx]
psadbw xmm1, xmm0
movdqa xmm2, xmm1
punpckhqdq xmm1, xmm1
paddw xmm1, xmm2
; add up
paddw xmm1, [GLOBAL(dc_8)]
psraw xmm1, 4
; FIXME use pshufb for ssse3 version
pshuflw xmm1, xmm1, 0x0
punpcklqdq xmm1, xmm1
packuswb xmm1, xmm1
; write out
mov rsi, 2
mov rdx, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
.label
movdqa [rdx ], xmm1
movdqa [rdx+rcx ], xmm1
movdqa [rdx+rcx*2], xmm1
movdqa [rdx+rax ], xmm1
lea rdx, [rdx+rcx*4]
movdqa [rdx ], xmm1
movdqa [rdx+rcx ], xmm1
movdqa [rdx+rcx*2], xmm1
movdqa [rdx+rax ], xmm1
lea rdx, [rdx+rcx*4]
dec rsi
jnz .label
; begin epilog
RESTORE_GOT
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_y_dcleft_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE
sym(vp8_intra_pred_y_dcleft_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
;arg(2) not used
; from left
mov rsi, arg(3) ;left;
movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
lea edx, [ecx+edx+8]
; add up
shr edx, 4
movd xmm1, edx
; FIXME use pshufb for ssse3 version
pshuflw xmm1, xmm1, 0x0
punpcklqdq xmm1, xmm1
packuswb xmm1, xmm1
; write out
mov rsi, 2
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
.label
movdqa [rdi ], xmm1
movdqa [rdi+rcx ], xmm1
movdqa [rdi+rcx*2], xmm1
movdqa [rdi+rax ], xmm1
lea rdi, [rdi+rcx*4]
movdqa [rdi ], xmm1
movdqa [rdi+rcx ], xmm1
movdqa [rdi+rcx*2], xmm1
movdqa [rdi+rax ], xmm1
lea rdi, [rdi+rcx*4]
dec rsi
jnz .label
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_y_dc128_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE
sym(vp8_intra_pred_y_dc128_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
GET_GOT rbx
; end prolog
;arg(2), arg(3), arg(4) not used
; write out
mov rsi, 2
movdqa xmm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
movsxd rdx, dword ptr arg(1) ;dst_stride
lea rcx, [rdx*3]
.label
movdqa [rax ], xmm1
movdqa [rax+rdx ], xmm1
movdqa [rax+rdx*2], xmm1
movdqa [rax+rcx ], xmm1
lea rax, [rax+rdx*4]
movdqa [rax ], xmm1
movdqa [rax+rdx ], xmm1
movdqa [rax+rdx*2], xmm1
movdqa [rax+rcx ], xmm1
lea rax, [rax+rdx*4]
dec rsi
jnz .label
; begin epilog
RESTORE_GOT
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_y_tm_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
%macro vp8_intra_pred_y_tm 1
global sym(vp8_intra_pred_y_tm_%1) PRIVATE
sym(vp8_intra_pred_y_tm_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 7
push rsi
push rdi
push rbx
GET_GOT rbx
; end prolog
; read top row
mov edx, 8
mov rsi, arg(2) ;above
movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm3, [GLOBAL(dc_1024)]
%endif
movdqa xmm1, [rsi]
movdqa xmm2, xmm1
punpcklbw xmm1, xmm0
punpckhbw xmm2, xmm0
; set up left ptrs ans subtract topleft
movd xmm4, [rsi-1]
mov rsi, arg(3) ;left
%ifidn %1, sse2
punpcklbw xmm4, xmm0
pshuflw xmm4, xmm4, 0x0
punpcklqdq xmm4, xmm4
%else
pshufb xmm4, xmm3
%endif
psubw xmm1, xmm4
psubw xmm2, xmm4
; set up dest ptrs
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_tm_%1_loop:
mov bl, [rsi]
movd xmm4, ebx
mov bl, [rsi+rax]
movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm4, xmm0
punpcklbw xmm5, xmm0
pshuflw xmm4, xmm4, 0x0
pshuflw xmm5, xmm5, 0x0
punpcklqdq xmm4, xmm4
punpcklqdq xmm5, xmm5
%else
pshufb xmm4, xmm3
pshufb xmm5, xmm3
%endif
movdqa xmm6, xmm4
movdqa xmm7, xmm5
paddw xmm4, xmm1
paddw xmm6, xmm2
paddw xmm5, xmm1
paddw xmm7, xmm2
packuswb xmm4, xmm6
packuswb xmm5, xmm7
movdqa [rdi ], xmm4
movdqa [rdi+rcx], xmm5
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
jnz vp8_intra_pred_y_tm_%1_loop
; begin epilog
RESTORE_GOT
pop rbx
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
%endmacro
vp8_intra_pred_y_tm sse2
vp8_intra_pred_y_tm ssse3
;void vp8_intra_pred_y_ve_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride
; )
global sym(vp8_intra_pred_y_ve_sse2) PRIVATE
sym(vp8_intra_pred_y_ve_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
; end prolog
;arg(3), arg(4) not used
mov rax, arg(2) ;above;
mov rsi, 2
movsxd rdx, dword ptr arg(1) ;dst_stride
; read from top
movdqa xmm1, [rax]
; write out
mov rax, arg(0) ;dst;
lea rcx, [rdx*3]
.label
movdqa [rax ], xmm1
movdqa [rax+rdx ], xmm1
movdqa [rax+rdx*2], xmm1
movdqa [rax+rcx ], xmm1
lea rax, [rax+rdx*4]
movdqa [rax ], xmm1
movdqa [rax+rdx ], xmm1
movdqa [rax+rdx*2], xmm1
movdqa [rax+rcx ], xmm1
lea rax, [rax+rdx*4]
dec rsi
jnz .label
; begin epilog
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_y_ho_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_y_ho_sse2) PRIVATE
sym(vp8_intra_pred_y_ho_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
push rbx
; end prolog
;arg(2) not used
; read from left and write out
mov edx, 8
mov rsi, arg(3) ;left;
movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_ho_sse2_loop:
mov bl, [rsi]
movd xmm0, ebx
mov bl, [rsi+rax]
movd xmm1, ebx
; FIXME use pshufb for ssse3 version
punpcklbw xmm0, xmm0
punpcklbw xmm1, xmm1
pshuflw xmm0, xmm0, 0x0
pshuflw xmm1, xmm1, 0x0
punpcklqdq xmm0, xmm0
punpcklqdq xmm1, xmm1
movdqa [rdi ], xmm0
movdqa [rdi+rcx], xmm1
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
jnz vp8_intra_pred_y_ho_sse2_loop
; begin epilog
pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
dc_128:
times 16 db 128
times 8 db 128
dc_4:
times 4 dw 4
align 16
dc_8:
times 8 dw 8
align 16
dc_1024:
times 8 dw 0x400
align 16

View File

@ -109,78 +109,3 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
#define build_intra_predictors_mby_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
const unsigned char *above, \
const unsigned char *left, int left_stride)
typedef build_intra_predictors_mby_prototype((*build_intra_predictors_mby_fn_t));
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dctop_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dcleft_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc128_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ho_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ve_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_sse2);
extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_ssse3);
static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char *dst_y,
int dst_stride,
unsigned char * yleft,
int left_stride,
build_intra_predictors_mby_fn_t tm_func)
{
int mode = x->mode_info_context->mbmi.mode;
build_intra_predictors_mbuv_fn_t fn;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
case TM_PRED: fn = tm_func; break;
case DC_PRED:
if (x->up_available) {
if (x->left_available) {
fn = vp8_intra_pred_y_dc_sse2; break;
} else {
fn = vp8_intra_pred_y_dctop_sse2; break;
}
} else if (x->left_available) {
fn = vp8_intra_pred_y_dcleft_sse2; break;
} else {
fn = vp8_intra_pred_y_dc128_sse2; break;
}
break;
default: return;
}
fn(dst_y, dst_stride, yabove_row, yleft, left_stride);
return;
}
void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
y_stride, yleft, left_stride,
vp8_intra_pred_y_tm_sse2);
}
void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x,
unsigned char * yabove_row,
unsigned char * yleft,
int left_stride,
unsigned char * ypred_ptr,
int y_stride)
{
vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
y_stride, yleft, left_stride,
vp8_intra_pred_y_tm_ssse3);
}

View File

@ -23,6 +23,7 @@
#include "vp8/common/entropymode.h"
#include "vp8/common/quant_common.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/setupintrarecon.h"
#include "decodemv.h"

View File

@ -25,9 +25,12 @@
#include <assert.h>
#include "vp8/common/quant_common.h"
#include "vp8/common/reconintra.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/systemdependent.h"
#include "vpx_ports/vpx_once.h"
#include "vpx_ports/vpx_timer.h"
#include "detokenize.h"
#if CONFIG_ERROR_CONCEALMENT
@ -42,6 +45,17 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
static int get_free_fb (VP8_COMMON *cm);
static void ref_cnt_fb (int *buf, int *idx, int new_idx);
static void initialize_dec(void) {
static volatile int init_done = 0;
if (!init_done)
{
vpx_dsp_rtcd();
vp8_init_intra_predictors();
init_done = 1;
}
}
static void remove_decompressor(VP8D_COMP *pbi)
{
#if CONFIG_ERROR_CONCEALMENT
@ -105,6 +119,8 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
vp8_setup_block_dptrs(&pbi->mb);
once(initialize_dec);
return pbi;
}

View File

@ -24,6 +24,7 @@
#include "detokenize.h"
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/setupintrarecon.h"
#if CONFIG_ERROR_CONCEALMENT
#include "error_concealment.h"

View File

@ -13,6 +13,7 @@
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/encoder/quantize.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
#include "vp8/common/invtrans.h"

View File

@ -31,6 +31,7 @@
#include "vp8/common/postproc.h"
#endif
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/swapyv12buffer.h"
#include "vp8/common/threading.h"
#include "vpx_ports/vpx_timer.h"
@ -422,6 +423,16 @@ static void setup_features(VP8_COMP *cpi)
static void dealloc_raw_frame_buffers(VP8_COMP *cpi);
void vp8_initialize_enc(void)
{
static volatile int init_done = 0;
if (!init_done) {
vpx_dsp_rtcd();
vp8_init_intra_predictors();
init_done = 1;
}
}
static void dealloc_compressor_data(VP8_COMP *cpi)
{

View File

@ -716,6 +716,8 @@ typedef struct VP8_COMP
} rd_costs;
} VP8_COMP;
void vp8_initialize_enc(void);
void vp8_alloc_compressor_data(VP8_COMP *cpi);
int vp8_reverse_trans(int x);
void vp8_new_framerate(VP8_COMP *cpi, double framerate);

View File

@ -21,6 +21,7 @@
#include "vp8/common/findnearmv.h"
#include "encodemb.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "vpx_dsp/variance.h"
#include "mcomp.h"

View File

@ -24,6 +24,7 @@
#include "pickinter.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/findnearmv.h"
#include "vp8/common/quant_common.h"

View File

@ -45,6 +45,7 @@ VP8_COMMON_SRCS-yes += common/mv.h
VP8_COMMON_SRCS-yes += common/onyxc_int.h
VP8_COMMON_SRCS-yes += common/quant_common.h
VP8_COMMON_SRCS-yes += common/reconinter.h
VP8_COMMON_SRCS-yes += common/reconintra.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
VP8_COMMON_SRCS-yes += common/rtcd.c
VP8_COMMON_SRCS-yes += common/rtcd_defs.pl

View File

@ -17,6 +17,7 @@
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/vpx_once.h"
#include "vp8/encoder/onyx_int.h"
#include "vpx/vp8cx.h"
#include "vp8/encoder/firstpass.h"
@ -693,6 +694,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx,
else
ctx->priv->enc.total_encoders = 1;
once(vp8_initialize_enc);
res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0);
if (!res)

View File

@ -36,7 +36,6 @@ DSP_SRCS-yes += bitreader_buffer.h
endif
# intra predictions
ifneq ($(filter yes,$(CONFIG_VP9) $(CONFIG_VP10)),)
DSP_SRCS-yes += intrapred.c
ifeq ($(CONFIG_USE_X86INC),yes)
@ -59,7 +58,6 @@ DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c
DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred4_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred8_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c
endif # CONFIG_VP9 || CONFIG_VP10
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c

View File

@ -54,165 +54,164 @@ if ($opts{arch} eq "x86_64") {
# Intra prediction
#
if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_4x4/;
add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_4x4/;
add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_4x4 neon/;
add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_4x4 neon/;
add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_8x8/;
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_8x8/;
add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_8x8/;
add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_8x8/;
add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_16x16/;
add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_16x16/;
add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_16x16/;
add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_16x16/;
add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_32x32/;
add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_32x32/;
add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_32x32/;
add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_32x32/;
add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
# High bitdepth functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d207_predictor_4x4/;
@ -368,8 +367,7 @@ if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes"))
add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_128_predictor_32x32/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9 || CONFIG_VP10
} # CONFIG_VP9_HIGHBITDEPTH
#
# Sub Pixel Filters