From 54d48955f6b80de5d58e8da52dba97813296b713 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 30 Sep 2015 11:07:35 -0400 Subject: [PATCH] vp8: change build_intra_predictors_mby_s to use vpx_dsp. Change-Id: I2000820e0c04de2c975d370a0cf7145330289bb2 --- test/intrapred_test.cc | 89 ----- vp8/common/arm/neon/reconintra_neon.c | 86 ----- vp8/common/mips/msa/reconintra_msa.c | 160 -------- vp8/common/reconintra.c | 153 +++----- vp8/common/reconintra.h | 34 ++ vp8/common/rtcd_defs.pl | 3 - vp8/common/x86/recon_sse2.asm | 509 +------------------------- vp8/common/x86/recon_wrapper_sse2.c | 75 ---- vp8/decoder/decodeframe.c | 1 + vp8/decoder/onyxd_if.c | 16 + vp8/decoder/threading.c | 1 + vp8/encoder/encodeintra.c | 1 + vp8/encoder/onyx_if.c | 11 + vp8/encoder/onyx_int.h | 2 + vp8/encoder/pickinter.c | 1 + vp8/encoder/rdopt.c | 1 + vp8/vp8_common.mk | 1 + vp8/vp8_cx_iface.c | 3 + vpx_dsp/vpx_dsp.mk | 2 - vpx_dsp/vpx_dsp_rtcd_defs.pl | 422 +++++++++++---------- 20 files changed, 324 insertions(+), 1247 deletions(-) create mode 100644 vp8/common/reconintra.h diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc index 65a069748..697d2fcac 100644 --- a/test/intrapred_test.cc +++ b/test/intrapred_test.cc @@ -216,95 +216,6 @@ class IntraPredBase { int num_planes_; }; -typedef void (*IntraPredYFunc)(MACROBLOCKD *x, - uint8_t *yabove_row, - uint8_t *yleft, - int left_stride, - uint8_t *ypred_ptr, - int y_stride); - -class IntraPredYTest - : public IntraPredBase, - public ::testing::TestWithParam { - public: - static void SetUpTestCase() { - mb_ = reinterpret_cast( - vpx_memalign(32, sizeof(MACROBLOCKD))); - mi_ = reinterpret_cast( - vpx_memalign(32, sizeof(MODE_INFO))); - data_array_ = reinterpret_cast( - vpx_memalign(kDataAlignment, kDataBufferSize)); - } - - static void TearDownTestCase() { - vpx_free(data_array_); - vpx_free(mi_); - vpx_free(mb_); - data_array_ = NULL; - } - - protected: - static const int kBlockSize = 16; - static const int kDataAlignment = 16; - static const int kStride = kBlockSize * 3; - // We use 48 so that the data pointer of the first pixel in each row of - // each macroblock is 16-byte aligned, and this gives us access to the - // top-left and top-right corner pixels belonging to the top-left/right - // macroblocks. - // We use 17 lines so we have one line above us for top-prediction. - static const int kDataBufferSize = kStride * (kBlockSize + 1); - - virtual void SetUp() { - pred_fn_ = GetParam(); - SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1); - } - - virtual void Predict(MB_PREDICTION_MODE mode) { - mbptr_->mode_info_context->mbmi.mode = mode; - ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_, - data_ptr_[0] - kStride, - data_ptr_[0] - 1, kStride, - data_ptr_[0], kStride)); - } - - IntraPredYFunc pred_fn_; - static uint8_t* data_array_; - static MACROBLOCKD * mb_; - static MODE_INFO *mi_; -}; - -MACROBLOCKD* IntraPredYTest::mb_ = NULL; -MODE_INFO* IntraPredYTest::mi_ = NULL; -uint8_t* IntraPredYTest::data_array_ = NULL; - -TEST_P(IntraPredYTest, IntraPredTests) { - RunTest(); -} - -INSTANTIATE_TEST_CASE_P(C, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_c)); -#if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_sse2)); -#endif -#if HAVE_SSSE3 -INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_ssse3)); -#endif -#if HAVE_NEON -INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_neon)); -#endif -#if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, IntraPredYTest, - ::testing::Values( - vp8_build_intra_predictors_mby_s_msa)); -#endif - typedef void (*IntraPredUvFunc)(MACROBLOCKD *x, uint8_t *uabove_row, uint8_t *vabove_row, diff --git a/vp8/common/arm/neon/reconintra_neon.c b/vp8/common/arm/neon/reconintra_neon.c index af52cd5ea..5fc8f70e4 100644 --- a/vp8/common/arm/neon/reconintra_neon.c +++ b/vp8/common/arm/neon/reconintra_neon.c @@ -12,92 +12,6 @@ #include "vp8/common/blockd.h" -void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) { - const int mode = x->mode_info_context->mbmi.mode; - int i; - - switch (mode) { - case DC_PRED: - { - int shift = x->up_available + x->left_available; - uint8x16_t v_expected_dc = vdupq_n_u8(128); - - if (shift) { - unsigned int average = 0; - int expected_dc; - if (x->up_available) { - const uint8x16_t v_above = vld1q_u8(yabove_row); - const uint16x8_t a = vpaddlq_u8(v_above); - const uint32x4_t b = vpaddlq_u16(a); - const uint64x2_t c = vpaddlq_u32(b); - const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), - vreinterpret_u32_u64(vget_high_u64(c))); - average = vget_lane_u32(d, 0); - } - if (x->left_available) { - for (i = 0; i < 16; ++i) { - average += yleft[0]; - yleft += left_stride; - } - } - shift += 3; - expected_dc = (average + (1 << (shift - 1))) >> shift; - v_expected_dc = vmovq_n_u8((uint8_t)expected_dc); - } - for (i = 0; i < 16; ++i) { - vst1q_u8(ypred_ptr, v_expected_dc); - ypred_ptr += y_stride; - } - } - break; - case V_PRED: - { - const uint8x16_t v_above = vld1q_u8(yabove_row); - for (i = 0; i < 16; ++i) { - vst1q_u8(ypred_ptr, v_above); - ypred_ptr += y_stride; - } - } - break; - case H_PRED: - { - for (i = 0; i < 16; ++i) { - const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]); - yleft += left_stride; - vst1q_u8(ypred_ptr, v_yleft); - ypred_ptr += y_stride; - } - } - break; - case TM_PRED: - { - const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]); - const uint8x16_t v_above = vld1q_u8(yabove_row); - for (i = 0; i < 16; ++i) { - const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]); - const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft); - const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft); - const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo), - vreinterpretq_s16_u16(v_ytop_left)); - const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi), - vreinterpretq_s16_u16(v_ytop_left)); - const uint8x8_t pred_lo = vqmovun_s16(b_lo); - const uint8x8_t pred_hi = vqmovun_s16(b_hi); - - vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi)); - ypred_ptr += y_stride; - yleft += left_stride; - } - } - break; - } -} - void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x, unsigned char * uabove_row, unsigned char * vabove_row, diff --git a/vp8/common/mips/msa/reconintra_msa.c b/vp8/common/mips/msa/reconintra_msa.c index 57f705d25..9b3ac5f60 100644 --- a/vp8/common/mips/msa/reconintra_msa.c +++ b/vp8/common/mips/msa/reconintra_msa.c @@ -22,16 +22,6 @@ static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst, SD4(out, out, out, out, dst, dst_stride); } -static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst, - int32_t dst_stride) -{ - v16u8 out = LD_UB(src); - - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); - dst += (8 * dst_stride); - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); -} - static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { @@ -51,34 +41,6 @@ static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride, SD4(out4, out5, out6, out7, dst, dst_stride); } -static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride, - uint8_t *dst, int32_t dst_stride) -{ - uint32_t row; - uint8_t inp0, inp1, inp2, inp3; - v16u8 src0, src1, src2, src3; - - for (row = 4; row--;) - { - inp0 = src[0]; - src += src_stride; - inp1 = src[0]; - src += src_stride; - inp2 = src[0]; - src += src_stride; - inp3 = src[0]; - src += src_stride; - - src0 = (v16u8)__msa_fill_b(inp0); - src1 = (v16u8)__msa_fill_b(inp1); - src2 = (v16u8)__msa_fill_b(inp2); - src3 = (v16u8)__msa_fill_b(inp3); - - ST_UB4(src0, src1, src2, src3, dst, dst_stride); - dst += (4 * dst_stride); - } -} - static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left, int32_t src_stride_left, uint8_t *dst, int32_t dst_stride, @@ -140,128 +102,6 @@ static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left, SD4(out, out, out, out, dst, dst_stride); } -static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left, - int32_t src_stride_left, - uint8_t *dst, int32_t dst_stride, - uint8_t is_above, uint8_t is_left) -{ - uint32_t row; - uint32_t addition = 0; - v16u8 src_above, out; - v8u16 sum_above; - v4u32 sum_top; - v2u64 sum; - - if (is_left && is_above) - { - src_above = LD_UB(src_top); - - sum_above = __msa_hadd_u_h(src_above, src_above); - sum_top = __msa_hadd_u_w(sum_above, sum_above); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum); - sum = __msa_hadd_u_d(sum_top, sum_top); - addition = __msa_copy_u_w((v4i32)sum, 0); - - for (row = 0; row < 16; ++row) - { - addition += src_left[row * src_stride_left]; - } - - addition = (addition + 16) >> 5; - out = (v16u8)__msa_fill_b(addition); - } - else if (is_left) - { - for (row = 0; row < 16; ++row) - { - addition += src_left[row * src_stride_left]; - } - - addition = (addition + 8) >> 4; - out = (v16u8)__msa_fill_b(addition); - } - else if (is_above) - { - src_above = LD_UB(src_top); - - sum_above = __msa_hadd_u_h(src_above, src_above); - sum_top = __msa_hadd_u_w(sum_above, sum_above); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum); - sum = __msa_hadd_u_d(sum_top, sum_top); - sum = (v2u64)__msa_srari_d((v2i64)sum, 4); - out = (v16u8)__msa_splati_b((v16i8)sum, 0); - } - else - { - out = (v16u8)__msa_ldi_b(128); - } - - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); - dst += (8 * dst_stride); - ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); -} - -void vp8_build_intra_predictors_mby_s_msa(struct macroblockd *x, - unsigned char *yabove_row, - unsigned char *yleft, - int left_stride, - unsigned char *ypred_ptr, - int y_stride) -{ - uint32_t row, col; - uint8_t ytop_left = yabove_row[-1]; - - switch (x->mode_info_context->mbmi.mode) - { - case DC_PRED: - intra_predict_dc_16x16_msa(yabove_row, yleft, left_stride, - ypred_ptr, y_stride, - x->up_available, x->left_available); - break; - - case V_PRED: - intra_predict_vert_16x16_msa(yabove_row, ypred_ptr, y_stride); - break; - - case H_PRED: - intra_predict_horiz_16x16_msa(yleft, left_stride, ypred_ptr, - y_stride); - break; - - case TM_PRED: - for (row = 0; row < 16; ++row) - { - for (col = 0; col < 16; ++col) - { - int pred = yleft[row * left_stride] + yabove_row[col] - - ytop_left; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - ypred_ptr[col] = pred; - } - - ypred_ptr += y_stride; - } - break; - - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; - } -} - void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x, unsigned char *uabove_row, unsigned char *vabove_row, diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c index 0a6c51b35..13da4b6bb 100644 --- a/vp8/common/reconintra.c +++ b/vp8/common/reconintra.c @@ -9,132 +9,56 @@ */ -#include "vpx_config.h" -#include "vp8_rtcd.h" +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "./vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_once.h" #include "blockd.h" +#include "vp8/common/reconintra.h" -void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[4]; +static intra_pred_fn dc_pred[2][2]; + +static void vp8_init_intra_predictors_internal(void) { + pred[V_PRED] = vpx_v_predictor_16x16; + pred[H_PRED] = vpx_h_predictor_16x16; + pred[TM_PRED] = vpx_tm_predictor_16x16; + + dc_pred[0][0] = vpx_dc_128_predictor_16x16; + dc_pred[0][1] = vpx_dc_top_predictor_16x16; + dc_pred[1][0] = vpx_dc_left_predictor_16x16; + dc_pred[1][1] = vpx_dc_predictor_16x16; +} + +void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, + unsigned char * yabove_row, + unsigned char * yleft, + int left_stride, + unsigned char * ypred_ptr, + int y_stride) +{ + MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode; unsigned char yleft_col[16]; - unsigned char ytop_left = yabove_row[-1]; - int r, c, i; + int i; for (i = 0; i < 16; i++) { yleft_col[i] = yleft[i* left_stride]; } - /* for Y */ - switch (x->mode_info_context->mbmi.mode) + if (mode == DC_PRED) { - case DC_PRED: - { - int expected_dc; - int shift; - int average = 0; - - - if (x->up_available || x->left_available) - { - if (x->up_available) - { - for (i = 0; i < 16; i++) - { - average += yabove_row[i]; - } - } - - if (x->left_available) - { - - for (i = 0; i < 16; i++) - { - average += yleft_col[i]; - } - - } - - - - shift = 3 + x->up_available + x->left_available; - expected_dc = (average + (1 << (shift - 1))) >> shift; - } - else - { - expected_dc = 128; - } - - /*memset(ypred_ptr, expected_dc, 256);*/ - for (r = 0; r < 16; r++) - { - memset(ypred_ptr, expected_dc, 16); - ypred_ptr += y_stride; - } + dc_pred[x->left_available][x->up_available](ypred_ptr, y_stride, + yabove_row, yleft_col); } - break; - case V_PRED: + else { - - for (r = 0; r < 16; r++) - { - - ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0]; - ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1]; - ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2]; - ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3]; - ypred_ptr += y_stride; - } - } - break; - case H_PRED: - { - - for (r = 0; r < 16; r++) - { - - memset(ypred_ptr, yleft_col[r], 16); - ypred_ptr += y_stride; - } - - } - break; - case TM_PRED: - { - - for (r = 0; r < 16; r++) - { - for (c = 0; c < 16; c++) - { - int pred = yleft_col[r] + yabove_row[ c] - ytop_left; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - ypred_ptr[c] = pred; - } - - ypred_ptr += y_stride; - } - - } - break; - case B_PRED: - case NEARESTMV: - case NEARMV: - case ZEROMV: - case NEWMV: - case SPLITMV: - case MB_MODE_COUNT: - break; + pred[mode](ypred_ptr, y_stride, yabove_row, yleft_col); } } @@ -278,3 +202,8 @@ void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x, break; } } + +void vp8_init_intra_predictors(void) +{ + once(vp8_init_intra_predictors_internal); +} diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h new file mode 100644 index 000000000..2488bfd60 --- /dev/null +++ b/vp8/common/reconintra.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP8_COMMON_RECONINTRA_H_ +#define VP8_COMMON_RECONINTRA_H_ + +#include "vp8/common/blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, + unsigned char *yabove_row, + unsigned char *yleft, + int left_stride, + unsigned char *ypred_ptr, + int y_stride); + +void vp8_init_intra_predictors(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP8_COMMON_RECONINTRA_H_ diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl index 7924ae750..a705f904d 100644 --- a/vp8/common/rtcd_defs.pl +++ b/vp8/common/rtcd_defs.pl @@ -152,9 +152,6 @@ specialize qw/vp8_copy_mem8x4 mmx media neon dspr2 msa/; $vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6; $vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2; -add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"; -specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3 neon msa/; - add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"; specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/; diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index 7141f8324..e9c5dc72b 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -593,520 +593,13 @@ sym(vp8_intra_pred_uv_ho_%1): vp8_intra_pred_uv_ho mmx2 vp8_intra_pred_uv_ho ssse3 -;void vp8_intra_pred_y_dc_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dc_sse2) PRIVATE -sym(vp8_intra_pred_y_dc_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ; from top - mov rdi, arg(2) ;above - mov rsi, arg(3) ;left - movsxd rax, dword ptr arg(4) ;left_stride; - - pxor xmm0, xmm0 - movdqa xmm1, [rdi] - psadbw xmm1, xmm0 - movq xmm2, xmm1 - punpckhqdq xmm1, xmm1 - paddw xmm1, xmm2 - - ; from left - lea rdi, [rax*3] - - movzx ecx, byte [rsi] - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - - ; add up - pextrw edx, xmm1, 0x0 - lea edx, [edx+ecx+16] - sar edx, 5 - movd xmm1, edx - ; FIXME use pshufb for ssse3 version - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm1, xmm1 - packuswb xmm1, xmm1 - - ; write out - mov rsi, 2 - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - -.label - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - dec rsi - jnz .label - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_dctop_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE -sym(vp8_intra_pred_y_dctop_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - GET_GOT rbx - ; end prolog - - ;arg(3), arg(4) not used - - ; from top - mov rcx, arg(2) ;above; - pxor xmm0, xmm0 - movdqa xmm1, [rcx] - psadbw xmm1, xmm0 - movdqa xmm2, xmm1 - punpckhqdq xmm1, xmm1 - paddw xmm1, xmm2 - - ; add up - paddw xmm1, [GLOBAL(dc_8)] - psraw xmm1, 4 - ; FIXME use pshufb for ssse3 version - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm1, xmm1 - packuswb xmm1, xmm1 - - ; write out - mov rsi, 2 - mov rdx, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - -.label - movdqa [rdx ], xmm1 - movdqa [rdx+rcx ], xmm1 - movdqa [rdx+rcx*2], xmm1 - movdqa [rdx+rax ], xmm1 - lea rdx, [rdx+rcx*4] - movdqa [rdx ], xmm1 - movdqa [rdx+rcx ], xmm1 - movdqa [rdx+rcx*2], xmm1 - movdqa [rdx+rax ], xmm1 - lea rdx, [rdx+rcx*4] - dec rsi - jnz .label - - ; begin epilog - RESTORE_GOT - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_dcleft_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE -sym(vp8_intra_pred_y_dcleft_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - ;arg(2) not used - - ; from left - mov rsi, arg(3) ;left; - movsxd rax, dword ptr arg(4) ;left_stride; - - lea rdi, [rax*3] - movzx ecx, byte [rsi] - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - add ecx, edx - lea rsi, [rsi+rax*4] - movzx edx, byte [rsi] - add ecx, edx - movzx edx, byte [rsi+rax] - add ecx, edx - movzx edx, byte [rsi+rax*2] - add ecx, edx - movzx edx, byte [rsi+rdi] - lea edx, [ecx+edx+8] - - ; add up - shr edx, 4 - movd xmm1, edx - ; FIXME use pshufb for ssse3 version - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm1, xmm1 - packuswb xmm1, xmm1 - - ; write out - mov rsi, 2 - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - lea rax, [rcx*3] - -.label - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - movdqa [rdi ], xmm1 - movdqa [rdi+rcx ], xmm1 - movdqa [rdi+rcx*2], xmm1 - movdqa [rdi+rax ], xmm1 - lea rdi, [rdi+rcx*4] - dec rsi - jnz .label - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_dc128_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE -sym(vp8_intra_pred_y_dc128_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - GET_GOT rbx - ; end prolog - - ;arg(2), arg(3), arg(4) not used - - ; write out - mov rsi, 2 - movdqa xmm1, [GLOBAL(dc_128)] - mov rax, arg(0) ;dst; - movsxd rdx, dword ptr arg(1) ;dst_stride - lea rcx, [rdx*3] - -.label - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - dec rsi - jnz .label - - ; begin epilog - RESTORE_GOT - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_tm_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -%macro vp8_intra_pred_y_tm 1 -global sym(vp8_intra_pred_y_tm_%1) PRIVATE -sym(vp8_intra_pred_y_tm_%1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rbx - GET_GOT rbx - ; end prolog - - ; read top row - mov edx, 8 - mov rsi, arg(2) ;above - movsxd rax, dword ptr arg(4) ;left_stride; - pxor xmm0, xmm0 -%ifidn %1, ssse3 - movdqa xmm3, [GLOBAL(dc_1024)] -%endif - movdqa xmm1, [rsi] - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm0 - punpckhbw xmm2, xmm0 - - ; set up left ptrs ans subtract topleft - movd xmm4, [rsi-1] - mov rsi, arg(3) ;left -%ifidn %1, sse2 - punpcklbw xmm4, xmm0 - pshuflw xmm4, xmm4, 0x0 - punpcklqdq xmm4, xmm4 -%else - pshufb xmm4, xmm3 -%endif - psubw xmm1, xmm4 - psubw xmm2, xmm4 - - ; set up dest ptrs - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride -vp8_intra_pred_y_tm_%1_loop: - mov bl, [rsi] - movd xmm4, ebx - - mov bl, [rsi+rax] - movd xmm5, ebx -%ifidn %1, sse2 - punpcklbw xmm4, xmm0 - punpcklbw xmm5, xmm0 - pshuflw xmm4, xmm4, 0x0 - pshuflw xmm5, xmm5, 0x0 - punpcklqdq xmm4, xmm4 - punpcklqdq xmm5, xmm5 -%else - pshufb xmm4, xmm3 - pshufb xmm5, xmm3 -%endif - movdqa xmm6, xmm4 - movdqa xmm7, xmm5 - paddw xmm4, xmm1 - paddw xmm6, xmm2 - paddw xmm5, xmm1 - paddw xmm7, xmm2 - packuswb xmm4, xmm6 - packuswb xmm5, xmm7 - movdqa [rdi ], xmm4 - movdqa [rdi+rcx], xmm5 - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rcx*2] - dec edx - jnz vp8_intra_pred_y_tm_%1_loop - - ; begin epilog - RESTORE_GOT - pop rbx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -%endmacro - -vp8_intra_pred_y_tm sse2 -vp8_intra_pred_y_tm ssse3 - -;void vp8_intra_pred_y_ve_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride -; ) -global sym(vp8_intra_pred_y_ve_sse2) PRIVATE -sym(vp8_intra_pred_y_ve_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - ; end prolog - - ;arg(3), arg(4) not used - - mov rax, arg(2) ;above; - mov rsi, 2 - movsxd rdx, dword ptr arg(1) ;dst_stride - - ; read from top - movdqa xmm1, [rax] - - ; write out - mov rax, arg(0) ;dst; - lea rcx, [rdx*3] - -.label - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - movdqa [rax ], xmm1 - movdqa [rax+rdx ], xmm1 - movdqa [rax+rdx*2], xmm1 - movdqa [rax+rcx ], xmm1 - lea rax, [rax+rdx*4] - dec rsi - jnz .label - - ; begin epilog - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_intra_pred_y_ho_sse2( -; unsigned char *dst, -; int dst_stride -; unsigned char *above, -; unsigned char *left, -; int left_stride, -; ) -global sym(vp8_intra_pred_y_ho_sse2) PRIVATE -sym(vp8_intra_pred_y_ho_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - ;arg(2) not used - - ; read from left and write out - mov edx, 8 - mov rsi, arg(3) ;left; - movsxd rax, dword ptr arg(4) ;left_stride; - mov rdi, arg(0) ;dst; - movsxd rcx, dword ptr arg(1) ;dst_stride - -vp8_intra_pred_y_ho_sse2_loop: - mov bl, [rsi] - movd xmm0, ebx - mov bl, [rsi+rax] - movd xmm1, ebx - - ; FIXME use pshufb for ssse3 version - punpcklbw xmm0, xmm0 - punpcklbw xmm1, xmm1 - pshuflw xmm0, xmm0, 0x0 - pshuflw xmm1, xmm1, 0x0 - punpcklqdq xmm0, xmm0 - punpcklqdq xmm1, xmm1 - movdqa [rdi ], xmm0 - movdqa [rdi+rcx], xmm1 - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rcx*2] - dec edx - jnz vp8_intra_pred_y_ho_sse2_loop - - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - SECTION_RODATA align 16 dc_128: - times 16 db 128 + times 8 db 128 dc_4: times 4 dw 4 align 16 -dc_8: - times 8 dw 8 -align 16 dc_1024: times 8 dw 0x400 align 16 diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c index 65f4251a9..19862a91c 100644 --- a/vp8/common/x86/recon_wrapper_sse2.c +++ b/vp8/common/x86/recon_wrapper_sse2.c @@ -109,78 +109,3 @@ void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x, vp8_intra_pred_uv_tm_ssse3, vp8_intra_pred_uv_ho_ssse3); } - -#define build_intra_predictors_mby_prototype(sym) \ - void sym(unsigned char *dst, int dst_stride, \ - const unsigned char *above, \ - const unsigned char *left, int left_stride) -typedef build_intra_predictors_mby_prototype((*build_intra_predictors_mby_fn_t)); - -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dctop_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dcleft_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc128_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ho_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ve_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_sse2); -extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_ssse3); - -static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char *dst_y, - int dst_stride, - unsigned char * yleft, - int left_stride, - build_intra_predictors_mby_fn_t tm_func) -{ - int mode = x->mode_info_context->mbmi.mode; - build_intra_predictors_mbuv_fn_t fn; - - switch (mode) { - case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break; - case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break; - case TM_PRED: fn = tm_func; break; - case DC_PRED: - if (x->up_available) { - if (x->left_available) { - fn = vp8_intra_pred_y_dc_sse2; break; - } else { - fn = vp8_intra_pred_y_dctop_sse2; break; - } - } else if (x->left_available) { - fn = vp8_intra_pred_y_dcleft_sse2; break; - } else { - fn = vp8_intra_pred_y_dc128_sse2; break; - } - break; - default: return; - } - - fn(dst_y, dst_stride, yabove_row, yleft, left_stride); - return; -} - -void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) -{ - vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr, - y_stride, yleft, left_stride, - vp8_intra_pred_y_tm_sse2); -} - -void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) -{ - vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr, - y_stride, yleft, left_stride, - vp8_intra_pred_y_tm_ssse3); - -} diff --git a/vp8/decoder/decodeframe.c b/vp8/decoder/decodeframe.c index 8be8c1649..f0d760373 100644 --- a/vp8/decoder/decodeframe.c +++ b/vp8/decoder/decodeframe.c @@ -23,6 +23,7 @@ #include "vp8/common/entropymode.h" #include "vp8/common/quant_common.h" #include "vpx_scale/vpx_scale.h" +#include "vp8/common/reconintra.h" #include "vp8/common/setupintrarecon.h" #include "decodemv.h" diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 9015fcbb4..3468268a2 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -25,9 +25,12 @@ #include #include "vp8/common/quant_common.h" +#include "vp8/common/reconintra.h" +#include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/systemdependent.h" +#include "vpx_ports/vpx_once.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" #if CONFIG_ERROR_CONCEALMENT @@ -42,6 +45,17 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); static int get_free_fb (VP8_COMMON *cm); static void ref_cnt_fb (int *buf, int *idx, int new_idx); +static void initialize_dec(void) { + static volatile int init_done = 0; + + if (!init_done) + { + vpx_dsp_rtcd(); + vp8_init_intra_predictors(); + init_done = 1; + } +} + static void remove_decompressor(VP8D_COMP *pbi) { #if CONFIG_ERROR_CONCEALMENT @@ -105,6 +119,8 @@ static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) vp8_setup_block_dptrs(&pbi->mb); + once(initialize_dec); + return pbi; } diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 6801532f1..7c7184c78 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -24,6 +24,7 @@ #include "detokenize.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" #include "vp8/common/setupintrarecon.h" #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 938cc7ecb..44be959c9 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -13,6 +13,7 @@ #include "vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vp8/encoder/quantize.h" +#include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "encodemb.h" #include "vp8/common/invtrans.h" diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index f1b932632..df5bcf688 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -31,6 +31,7 @@ #include "vp8/common/postproc.h" #endif #include "vpx_mem/vpx_mem.h" +#include "vp8/common/reconintra.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" @@ -422,6 +423,16 @@ static void setup_features(VP8_COMP *cpi) static void dealloc_raw_frame_buffers(VP8_COMP *cpi); +void vp8_initialize_enc(void) +{ + static volatile int init_done = 0; + + if (!init_done) { + vpx_dsp_rtcd(); + vp8_init_intra_predictors(); + init_done = 1; + } +} static void dealloc_compressor_data(VP8_COMP *cpi) { diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 8beba27fe..317e4b9e4 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -716,6 +716,8 @@ typedef struct VP8_COMP } rd_costs; } VP8_COMP; +void vp8_initialize_enc(void); + void vp8_alloc_compressor_data(VP8_COMP *cpi); int vp8_reverse_trans(int x); void vp8_new_framerate(VP8_COMP *cpi, double framerate); diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index b50838550..d0fff3f04 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -21,6 +21,7 @@ #include "vp8/common/findnearmv.h" #include "encodemb.h" #include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "vpx_dsp/variance.h" #include "mcomp.h" diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index fdff378bc..ab0ad1599 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -24,6 +24,7 @@ #include "pickinter.h" #include "vp8/common/entropymode.h" #include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/findnearmv.h" #include "vp8/common/quant_common.h" diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index 3ad11c77f..c37636fb4 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -45,6 +45,7 @@ VP8_COMMON_SRCS-yes += common/mv.h VP8_COMMON_SRCS-yes += common/onyxc_int.h VP8_COMMON_SRCS-yes += common/quant_common.h VP8_COMMON_SRCS-yes += common/reconinter.h +VP8_COMMON_SRCS-yes += common/reconintra.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h VP8_COMMON_SRCS-yes += common/rtcd.c VP8_COMMON_SRCS-yes += common/rtcd_defs.pl diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 74eccd7a3..80ea6b432 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -17,6 +17,7 @@ #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_once.h" #include "vp8/encoder/onyx_int.h" #include "vpx/vp8cx.h" #include "vp8/encoder/firstpass.h" @@ -693,6 +694,8 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, else ctx->priv->enc.total_encoders = 1; + once(vp8_initialize_enc); + res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); if (!res) diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 98a3d3401..31d8c75a1 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -36,7 +36,6 @@ DSP_SRCS-yes += bitreader_buffer.h endif # intra predictions -ifneq ($(filter yes,$(CONFIG_VP9) $(CONFIG_VP10)),) DSP_SRCS-yes += intrapred.c ifeq ($(CONFIG_USE_X86INC),yes) @@ -59,7 +58,6 @@ DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred4_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred8_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c -endif # CONFIG_VP9 || CONFIG_VP10 DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 128dd8ba8..be7d5c2c6 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -54,322 +54,320 @@ if ($opts{arch} eq "x86_64") { # Intra prediction # -if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) { - add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc"; - add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc"; +add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc"; - add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc"; - add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_4x4/; +add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_4x4/; - add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_4x4 neon/; +add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_4x4 neon/; - add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc"; +add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc"; - add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; +add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc"; - add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; +add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc"; - add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc"; - add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc"; +add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc"; - add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc"; - add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_8x8/; +add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_8x8/; - add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_8x8/; +add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_8x8/; - add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc"; +add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc"; - add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; +add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; +add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc"; - add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc"; - add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc"; +add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc"; - add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc"; - add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_16x16/; +add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_16x16/; - add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_16x16/; +add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_16x16/; - add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc"; +add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc"; - add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; +add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc"; - add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d117_predictor_32x32/; +add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_32x32/; - add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d135_predictor_32x32/; +add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d135_predictor_32x32/; - add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc"; +add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc"; - add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; +add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; - add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; +add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; - add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc"; - add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; - specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; +add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc"; # High bitdepth functions - if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_4x4/; +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_4x4/; - add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_4x4/; + add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_4x4/; - add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_4x4/; + add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_4x4/; - add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_4x4/; + add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_4x4/; - add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_4x4/; + add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_4x4/; - add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_4x4/; + add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_4x4/; - add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_4x4/; + add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_4x4/; - add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc"; + add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc"; - add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc"; + add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc"; - add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc"; + add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc"; - add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_4x4/; + add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_4x4/; - add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_4x4/; + add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_4x4/; - add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_4x4/; + add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_4x4/; - add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_8x8/; + add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_8x8/; - add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_8x8/; + add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_8x8/; - add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_8x8/; + add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_8x8/; - add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_8x8/; + add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_8x8/; - add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_8x8/; + add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_8x8/; - add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_8x8/; + add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_8x8/; - add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_8x8/; + add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_8x8/; - add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";; + add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";; - add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_8x8/; + add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_8x8/; - add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_8x8/; + add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_8x8/; - add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_8x8/; + add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_8x8/; - add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_16x16/; + add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_16x16/; - add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_16x16/; + add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_16x16/; - add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_16x16/; + add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_16x16/; - add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_16x16/; + add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_16x16/; - add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_16x16/; + add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_16x16/; - add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_16x16/; + add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_16x16/; - add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_16x16/; + add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_16x16/; - add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; + add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc"; - add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_16x16/; - add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_16x16/; + add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_16x16/; - add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_32x32/; + add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d207_predictor_32x32/; - add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d45_predictor_32x32/; + add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d45_predictor_32x32/; - add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_32x32/; + add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d63_predictor_32x32/; - add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_h_predictor_32x32/; + add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_h_predictor_32x32/; - add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_32x32/; + add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d117_predictor_32x32/; - add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d135_predictor_32x32/; + add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d135_predictor_32x32/; - add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_32x32/; + add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_d153_predictor_32x32/; - add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc"; + add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc"; - add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; + add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc"; - add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; + add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc"; - add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_top_predictor_32x32/; + add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_top_predictor_32x32/; - add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_left_predictor_32x32/; + add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_left_predictor_32x32/; - add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_dc_128_predictor_32x32/; - } # CONFIG_VP9_HIGHBITDEPTH -} # CONFIG_VP9 || CONFIG_VP10 + add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + specialize qw/vpx_highbd_dc_128_predictor_32x32/; +} # CONFIG_VP9_HIGHBITDEPTH # # Sub Pixel Filters