Merge "Remove get_filter_base() and get_filter_offset() in convolve"
This commit is contained in:
commit
7219f31904
@ -33,9 +33,9 @@ static const unsigned int kMaxDimension = 64;
|
|||||||
|
|
||||||
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
|
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int w, int h);
|
int h);
|
||||||
|
|
||||||
typedef void (*WrapperFilterBlock2d8Func)(
|
typedef void (*WrapperFilterBlock2d8Func)(
|
||||||
const uint8_t *src_ptr, const unsigned int src_stride,
|
const uint8_t *src_ptr, const unsigned int src_stride,
|
||||||
@ -550,7 +550,7 @@ TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
|
|||||||
|
|
||||||
vpx_usec_timer_start(&timer);
|
vpx_usec_timer_start(&timer);
|
||||||
for (int n = 0; n < kNumTests; ++n) {
|
for (int n = 0; n < kNumTests; ++n) {
|
||||||
UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
|
UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
|
||||||
width, height);
|
width, height);
|
||||||
}
|
}
|
||||||
vpx_usec_timer_mark(&timer);
|
vpx_usec_timer_mark(&timer);
|
||||||
@ -570,7 +570,7 @@ TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
|
|||||||
|
|
||||||
vpx_usec_timer_start(&timer);
|
vpx_usec_timer_start(&timer);
|
||||||
for (int n = 0; n < kNumTests; ++n) {
|
for (int n = 0; n < kNumTests; ++n) {
|
||||||
UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
|
UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
|
||||||
width, height);
|
width, height);
|
||||||
}
|
}
|
||||||
vpx_usec_timer_mark(&timer);
|
vpx_usec_timer_mark(&timer);
|
||||||
@ -585,7 +585,7 @@ TEST_P(ConvolveTest, Copy) {
|
|||||||
uint8_t *const out = output();
|
uint8_t *const out = output();
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
|
ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
|
||||||
NULL, 0, NULL, 0, Width(), Height()));
|
NULL, 0, 0, 0, 0, Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -604,7 +604,7 @@ TEST_P(ConvolveTest, Avg) {
|
|||||||
CopyOutputToRef();
|
CopyOutputToRef();
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
|
ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
|
||||||
NULL, 0, NULL, 0, Width(), Height()));
|
NULL, 0, 0, 0, 0, Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -621,12 +621,10 @@ TEST_P(ConvolveTest, Avg) {
|
|||||||
TEST_P(ConvolveTest, CopyHoriz) {
|
TEST_P(ConvolveTest, CopyHoriz) {
|
||||||
uint8_t *const in = input();
|
uint8_t *const in = input();
|
||||||
uint8_t *const out = output();
|
uint8_t *const out = output();
|
||||||
DECLARE_ALIGNED(256, const int16_t,
|
|
||||||
filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
|
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
|
ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
|
||||||
filter8, 16, filter8, 16, Width(),
|
vp9_filter_kernels[0], 0, 16, 0, 16,
|
||||||
Height()));
|
Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -641,12 +639,10 @@ TEST_P(ConvolveTest, CopyHoriz) {
|
|||||||
TEST_P(ConvolveTest, CopyVert) {
|
TEST_P(ConvolveTest, CopyVert) {
|
||||||
uint8_t *const in = input();
|
uint8_t *const in = input();
|
||||||
uint8_t *const out = output();
|
uint8_t *const out = output();
|
||||||
DECLARE_ALIGNED(256, const int16_t,
|
|
||||||
filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
|
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
|
ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
|
||||||
filter8, 16, filter8, 16, Width(),
|
vp9_filter_kernels[0], 0, 16, 0, 16,
|
||||||
Height()));
|
Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -661,12 +657,10 @@ TEST_P(ConvolveTest, CopyVert) {
|
|||||||
TEST_P(ConvolveTest, Copy2D) {
|
TEST_P(ConvolveTest, Copy2D) {
|
||||||
uint8_t *const in = input();
|
uint8_t *const in = input();
|
||||||
uint8_t *const out = output();
|
uint8_t *const out = output();
|
||||||
DECLARE_ALIGNED(256, const int16_t,
|
|
||||||
filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
|
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
|
ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
|
||||||
filter8, 16, filter8, 16, Width(),
|
vp9_filter_kernels[0], 0, 16, 0, 16,
|
||||||
Height()));
|
Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -702,7 +696,6 @@ TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int16_t kInvalidFilter[8] = { 0 };
|
|
||||||
const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
|
const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
|
||||||
wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
|
wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
|
||||||
};
|
};
|
||||||
@ -755,21 +748,21 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
|
|||||||
Width(), Height(), UUT_->use_highbd_);
|
Width(), Height(), UUT_->use_highbd_);
|
||||||
|
|
||||||
if (filter_x && filter_y)
|
if (filter_x && filter_y)
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
|
ASM_REGISTER_STATE_CHECK(
|
||||||
in, kInputStride, out, kOutputStride, filters[filter_x], 16,
|
UUT_->hv8_[i](in, kInputStride, out, kOutputStride, filters,
|
||||||
filters[filter_y], 16, Width(), Height()));
|
filter_x, 16, filter_y, 16, Width(), Height()));
|
||||||
else if (filter_y)
|
else if (filter_y)
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
|
ASM_REGISTER_STATE_CHECK(
|
||||||
in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
|
UUT_->v8_[i](in, kInputStride, out, kOutputStride, filters, 0,
|
||||||
filters[filter_y], 16, Width(), Height()));
|
16, filter_y, 16, Width(), Height()));
|
||||||
else if (filter_x)
|
else if (filter_x)
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
|
ASM_REGISTER_STATE_CHECK(
|
||||||
in, kInputStride, out, kOutputStride, filters[filter_x], 16,
|
UUT_->h8_[i](in, kInputStride, out, kOutputStride, filters,
|
||||||
kInvalidFilter, 16, Width(), Height()));
|
filter_x, 16, 0, 16, Width(), Height()));
|
||||||
else
|
else
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
|
ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](in, kInputStride, out,
|
||||||
in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
|
kOutputStride, NULL, 0, 0,
|
||||||
kInvalidFilter, 0, Width(), Height()));
|
0, 0, Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -853,21 +846,21 @@ TEST_P(ConvolveTest, FilterExtremes) {
|
|||||||
filters[filter_y], ref, kOutputStride,
|
filters[filter_y], ref, kOutputStride,
|
||||||
Width(), Height(), UUT_->use_highbd_);
|
Width(), Height(), UUT_->use_highbd_);
|
||||||
if (filter_x && filter_y)
|
if (filter_x && filter_y)
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
|
ASM_REGISTER_STATE_CHECK(
|
||||||
in, kInputStride, out, kOutputStride, filters[filter_x], 16,
|
UUT_->hv8_[0](in, kInputStride, out, kOutputStride, filters,
|
||||||
filters[filter_y], 16, Width(), Height()));
|
filter_x, 16, filter_y, 16, Width(), Height()));
|
||||||
else if (filter_y)
|
else if (filter_y)
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
|
ASM_REGISTER_STATE_CHECK(
|
||||||
in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
|
UUT_->v8_[0](in, kInputStride, out, kOutputStride, filters, 0,
|
||||||
filters[filter_y], 16, Width(), Height()));
|
16, filter_y, 16, Width(), Height()));
|
||||||
else if (filter_x)
|
else if (filter_x)
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
|
ASM_REGISTER_STATE_CHECK(
|
||||||
in, kInputStride, out, kOutputStride, filters[filter_x], 16,
|
UUT_->h8_[0](in, kInputStride, out, kOutputStride, filters,
|
||||||
kInvalidFilter, 16, Width(), Height()));
|
filter_x, 16, 0, 16, Width(), Height()));
|
||||||
else
|
else
|
||||||
ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
|
ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out,
|
||||||
in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
|
kOutputStride, NULL, 0, 0,
|
||||||
kInvalidFilter, 0, Width(), Height()));
|
0, 0, Width(), Height()));
|
||||||
|
|
||||||
for (int y = 0; y < Height(); ++y) {
|
for (int y = 0; y < Height(); ++y) {
|
||||||
for (int x = 0; x < Width(); ++x)
|
for (int x = 0; x < Width(); ++x)
|
||||||
@ -897,8 +890,8 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
|
|||||||
for (int step = 1; step <= 32; ++step) {
|
for (int step = 1; step <= 32; ++step) {
|
||||||
/* Test the horizontal and vertical filters in combination. */
|
/* Test the horizontal and vertical filters in combination. */
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
|
UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap, frac,
|
||||||
step, eighttap[frac], step, Width(), Height()));
|
step, frac, step, Width(), Height()));
|
||||||
|
|
||||||
CheckGuardBlocks();
|
CheckGuardBlocks();
|
||||||
|
|
||||||
@ -917,14 +910,14 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
|
|||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
#define WRAP(func, bd) \
|
#define WRAP(func, bd) \
|
||||||
void wrap_##func##_##bd( \
|
void wrap_##func##_##bd( \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
|
ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
|
||||||
const int16_t *filter_y, int filter_y_stride, int w, int h) { \
|
int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \
|
||||||
vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
|
vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
|
||||||
reinterpret_cast<uint16_t *>(dst), dst_stride, filter_x, \
|
reinterpret_cast<uint16_t *>(dst), dst_stride, filter, \
|
||||||
filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_SSE2 && ARCH_X86_64
|
#if HAVE_SSE2 && ARCH_X86_64
|
||||||
|
@ -26,9 +26,9 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
|
|||||||
const struct scale_factors *sf, int w, int h,
|
const struct scale_factors *sf, int w, int h,
|
||||||
int ref, const InterpKernel *kernel, int xs,
|
int ref, const InterpKernel *kernel, int xs,
|
||||||
int ys) {
|
int ys) {
|
||||||
sf->predict[subpel_x != 0][subpel_y != 0][ref](
|
sf->predict[subpel_x != 0][subpel_y != 0][ref](src, src_stride, dst,
|
||||||
src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
|
dst_stride, kernel, subpel_x,
|
||||||
ys, w, h);
|
xs, subpel_y, ys, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
@ -37,8 +37,8 @@ static INLINE void highbd_inter_predictor(
|
|||||||
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
|
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
|
||||||
int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
|
int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
|
||||||
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
|
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
|
||||||
src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
|
src, src_stride, dst, dst_stride, kernel, subpel_x, xs, subpel_y, ys, w,
|
||||||
ys, w, h, bd);
|
h, bd);
|
||||||
}
|
}
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
@ -390,12 +390,12 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (decision == FILTER_BLOCK) {
|
if (decision == FILTER_BLOCK) {
|
||||||
vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0,
|
vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0, 0,
|
||||||
NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
|
0, 0, num_4x4_blocks_wide_lookup[bs] << 2,
|
||||||
num_4x4_blocks_high_lookup[bs] << 2);
|
num_4x4_blocks_high_lookup[bs] << 2);
|
||||||
} else { // COPY_BLOCK
|
} else { // COPY_BLOCK
|
||||||
vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0,
|
vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0, 0,
|
||||||
NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
|
0, 0, num_4x4_blocks_wide_lookup[bs] << 2,
|
||||||
num_4x4_blocks_high_lookup[bs] << 2);
|
num_4x4_blocks_high_lookup[bs] << 2);
|
||||||
}
|
}
|
||||||
*denoiser_decision = decision;
|
*denoiser_decision = decision;
|
||||||
|
@ -2645,15 +2645,14 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
|||||||
|
|
||||||
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
|
vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
|
||||||
CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
|
CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
|
||||||
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
|
x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
|
||||||
kernel[y_q4 & 0xf], 16 * src_h / dst_h,
|
16 * src_h / dst_h, 16 / factor, 16 / factor,
|
||||||
16 / factor, 16 / factor, bd);
|
bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
|
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
|
||||||
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
|
x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
|
||||||
kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor,
|
16 * src_h / dst_h, 16 / factor, 16 / factor);
|
||||||
16 / factor);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,10 +43,9 @@ void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
|
|||||||
(x / factor) * src_w / dst_w;
|
(x / factor) * src_w / dst_w;
|
||||||
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
|
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
|
||||||
|
|
||||||
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
|
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
|
||||||
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
|
x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
|
||||||
kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor,
|
16 * src_h / dst_h, 16 / factor, 16 / factor);
|
||||||
16 / factor);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2162,15 +2162,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
|||||||
vpx_highbd_convolve_copy(
|
vpx_highbd_convolve_copy(
|
||||||
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
|
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
|
||||||
CONVERT_TO_SHORTPTR(this_mode_pred->data), this_mode_pred->stride,
|
CONVERT_TO_SHORTPTR(this_mode_pred->data), this_mode_pred->stride,
|
||||||
NULL, 0, NULL, 0, bw, bh, xd->bd);
|
NULL, 0, 0, 0, 0, bw, bh, xd->bd);
|
||||||
else
|
else
|
||||||
vpx_convolve_copy(best_pred->data, best_pred->stride,
|
vpx_convolve_copy(best_pred->data, best_pred->stride,
|
||||||
this_mode_pred->data, this_mode_pred->stride, NULL,
|
this_mode_pred->data, this_mode_pred->stride, NULL,
|
||||||
0, NULL, 0, bw, bh);
|
0, 0, 0, 0, bw, bh);
|
||||||
#else
|
#else
|
||||||
vpx_convolve_copy(best_pred->data, best_pred->stride,
|
vpx_convolve_copy(best_pred->data, best_pred->stride,
|
||||||
this_mode_pred->data, this_mode_pred->stride, NULL, 0,
|
this_mode_pred->data, this_mode_pred->stride, NULL, 0,
|
||||||
NULL, 0, bw, bh);
|
0, 0, 0, bw, bh);
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
best_pred = this_mode_pred;
|
best_pred = this_mode_pred;
|
||||||
}
|
}
|
||||||
@ -2264,14 +2264,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
|||||||
if (cm->use_highbitdepth)
|
if (cm->use_highbitdepth)
|
||||||
vpx_highbd_convolve_copy(
|
vpx_highbd_convolve_copy(
|
||||||
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
|
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
|
||||||
CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, NULL, 0,
|
CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, 0, 0, 0,
|
||||||
bw, bh, xd->bd);
|
bw, bh, xd->bd);
|
||||||
else
|
else
|
||||||
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
|
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
|
||||||
pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
|
pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh);
|
||||||
#else
|
#else
|
||||||
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
|
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
|
||||||
pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
|
pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh);
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -600,7 +600,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
|
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
|
||||||
32, NULL, 0, NULL, 0, bs, bs, xd->bd);
|
32, NULL, 0, 0, 0, 0, bs, bs, xd->bd);
|
||||||
if (xd->lossless) {
|
if (xd->lossless) {
|
||||||
vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
|
vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
|
||||||
} else {
|
} else {
|
||||||
@ -623,7 +623,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
|
|||||||
recon = CONVERT_TO_BYTEPTR(recon16);
|
recon = CONVERT_TO_BYTEPTR(recon16);
|
||||||
} else {
|
} else {
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
|
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs);
|
||||||
switch (tx_size) {
|
switch (tx_size) {
|
||||||
case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
|
case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
|
||||||
case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
|
case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
|
||||||
|
@ -137,15 +137,14 @@ static INLINE uint16x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1,
|
|||||||
|
|
||||||
void vpx_highbd_convolve8_horiz_neon(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_horiz_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, // unused
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int y_step_q4, // unused
|
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (x_step_q4 != 16) {
|
if (x_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);
|
||||||
} else {
|
} else {
|
||||||
const int16x8_t filters = vld1q_s16(filter_x);
|
const int16x8_t filters = vld1q_s16(filter[x0_q4]);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
uint16x8_t t0, t1, t2, t3;
|
uint16x8_t t0, t1, t2, t3;
|
||||||
|
|
||||||
@ -337,15 +336,15 @@ void vpx_highbd_convolve8_horiz_neon(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
void vpx_highbd_convolve8_avg_horiz_neon(const uint16_t *src,
|
void vpx_highbd_convolve8_avg_horiz_neon(const uint16_t *src,
|
||||||
ptrdiff_t src_stride, uint16_t *dst,
|
ptrdiff_t src_stride, uint16_t *dst,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, // unused
|
int x_step_q4, int y0_q4,
|
||||||
int y_step_q4, // unused
|
int y_step_q4, int w, int h, int bd) {
|
||||||
int w, int h, int bd) {
|
|
||||||
if (x_step_q4 != 16) {
|
if (x_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,
|
||||||
|
bd);
|
||||||
} else {
|
} else {
|
||||||
const int16x8_t filters = vld1q_s16(filter_x);
|
const int16x8_t filters = vld1q_s16(filter[x0_q4]);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
uint16x8_t t0, t1, t2, t3;
|
uint16x8_t t0, t1, t2, t3;
|
||||||
|
|
||||||
@ -566,15 +565,14 @@ void vpx_highbd_convolve8_avg_horiz_neon(const uint16_t *src,
|
|||||||
|
|
||||||
void vpx_highbd_convolve8_vert_neon(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_vert_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, // unused
|
const InterpKernel *filter, int x0_q4,
|
||||||
int x_step_q4, // unused
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (y_step_q4 != 16) {
|
if (y_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x_step_q4, y0_q4, y_step_q4, w, h, bd);
|
||||||
} else {
|
} else {
|
||||||
const int16x8_t filters = vld1q_s16(filter_y);
|
const int16x8_t filters = vld1q_s16(filter[y0_q4]);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
|
|
||||||
assert(!((intptr_t)dst & 3));
|
assert(!((intptr_t)dst & 3));
|
||||||
@ -732,15 +730,15 @@ void vpx_highbd_convolve8_vert_neon(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
void vpx_highbd_convolve8_avg_vert_neon(const uint16_t *src,
|
void vpx_highbd_convolve8_avg_vert_neon(const uint16_t *src,
|
||||||
ptrdiff_t src_stride, uint16_t *dst,
|
ptrdiff_t src_stride, uint16_t *dst,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, // unused
|
const InterpKernel *filter, int x0_q4,
|
||||||
int x_step_q4, // unused
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (y_step_q4 != 16) {
|
if (y_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,
|
||||||
|
bd);
|
||||||
} else {
|
} else {
|
||||||
const int16x8_t filters = vld1q_s16(filter_y);
|
const int16x8_t filters = vld1q_s16(filter[y0_q4]);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
|
|
||||||
assert(!((intptr_t)dst & 3));
|
assert(!((intptr_t)dst & 3));
|
||||||
|
@ -15,13 +15,14 @@
|
|||||||
|
|
||||||
void vpx_highbd_convolve_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
if (w < 8) { // avg4
|
if (w < 8) { // avg4
|
||||||
|
@ -15,13 +15,14 @@
|
|||||||
|
|
||||||
void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
if (w < 8) { // copy4
|
if (w < 8) { // copy4
|
||||||
|
@ -15,10 +15,9 @@
|
|||||||
|
|
||||||
void vpx_highbd_convolve8_neon(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
|
|
||||||
// + 1 to make it divisible by 4
|
// + 1 to make it divisible by 4
|
||||||
uint16_t temp[64 * 136];
|
uint16_t temp[64 * 136];
|
||||||
const int intermediate_height =
|
const int intermediate_height =
|
||||||
@ -29,20 +28,19 @@ void vpx_highbd_convolve8_neon(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
* buffer which has lots of extra room and is subsequently discarded this is
|
* buffer which has lots of extra room and is subsequently discarded this is
|
||||||
* safe if somewhat less than ideal. */
|
* safe if somewhat less than ideal. */
|
||||||
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
|
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w,
|
||||||
intermediate_height, bd);
|
intermediate_height, bd);
|
||||||
|
|
||||||
/* Step into the temp buffer 3 lines to get the actual frame data */
|
/* Step into the temp buffer 3 lines to get the actual frame data */
|
||||||
vpx_highbd_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
|
vpx_highbd_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
|
|
||||||
// + 1 to make it divisible by 4
|
// + 1 to make it divisible by 4
|
||||||
uint16_t temp[64 * 136];
|
uint16_t temp[64 * 136];
|
||||||
const int intermediate_height =
|
const int intermediate_height =
|
||||||
@ -52,8 +50,9 @@ void vpx_highbd_convolve8_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
* to average the values after both passes.
|
* to average the values after both passes.
|
||||||
*/
|
*/
|
||||||
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
|
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w,
|
||||||
intermediate_height, bd);
|
intermediate_height, bd);
|
||||||
vpx_highbd_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
|
vpx_highbd_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,
|
||||||
|
bd);
|
||||||
}
|
}
|
||||||
|
@ -42,10 +42,11 @@
|
|||||||
; r1 int src_stride
|
; r1 int src_stride
|
||||||
; r2 uint8_t *dst
|
; r2 uint8_t *dst
|
||||||
; r3 int dst_stride
|
; r3 int dst_stride
|
||||||
; sp[]const int16_t *filter_x
|
; sp[]const int16_t *filter
|
||||||
; sp[]int x_step_q4
|
; sp[]int x0_q4
|
||||||
; sp[]const int16_t *filter_y ; unused
|
; sp[]int x_step_q4 ; unused
|
||||||
; sp[]int y_step_q4 ; unused
|
; sp[]int y0_q4
|
||||||
|
; sp[]int y_step_q4 ; unused
|
||||||
; sp[]int w
|
; sp[]int w
|
||||||
; sp[]int h
|
; sp[]int h
|
||||||
|
|
||||||
@ -54,11 +55,11 @@
|
|||||||
|
|
||||||
sub r0, r0, #3 ; adjust for taps
|
sub r0, r0, #3 ; adjust for taps
|
||||||
|
|
||||||
ldr r5, [sp, #32] ; filter_x
|
ldrd r4, r5, [sp, #32] ; filter, x0_q4
|
||||||
ldr r6, [sp, #48] ; w
|
add r4, r5, lsl #4
|
||||||
ldr r7, [sp, #52] ; h
|
ldrd r6, r7, [sp, #52] ; w, h
|
||||||
|
|
||||||
vld1.s16 {q0}, [r5] ; filter_x
|
vld1.s16 {q0}, [r4] ; filter
|
||||||
|
|
||||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||||
@ -127,7 +128,7 @@ vpx_convolve8_avg_loop_horiz
|
|||||||
|
|
||||||
sub r2, r2, r3, lsl #2 ; reset for store
|
sub r2, r2, r3, lsl #2 ; reset for store
|
||||||
|
|
||||||
; src[] * filter_x
|
; src[] * filter
|
||||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||||
@ -184,11 +185,13 @@ vpx_convolve8_avg_loop_horiz
|
|||||||
sub r0, r0, r1
|
sub r0, r0, r1
|
||||||
sub r0, r0, r1, lsl #1
|
sub r0, r0, r1, lsl #1
|
||||||
|
|
||||||
ldr r4, [sp, #32] ; filter_y
|
ldr r4, [sp, #24] ; filter
|
||||||
ldr r6, [sp, #40] ; w
|
ldr r5, [sp, #36] ; y0_q4
|
||||||
ldr lr, [sp, #44] ; h
|
add r4, r5, lsl #4
|
||||||
|
ldr r6, [sp, #44] ; w
|
||||||
|
ldr lr, [sp, #48] ; h
|
||||||
|
|
||||||
vld1.s16 {q0}, [r4] ; filter_y
|
vld1.s16 {q0}, [r4] ; filter
|
||||||
|
|
||||||
lsl r1, r1, #1
|
lsl r1, r1, #1
|
||||||
lsl r3, r3, #1
|
lsl r3, r3, #1
|
||||||
@ -232,7 +235,7 @@ vpx_convolve8_avg_loop_vert
|
|||||||
pld [r7]
|
pld [r7]
|
||||||
pld [r4]
|
pld [r4]
|
||||||
|
|
||||||
; src[] * filter_y
|
; src[] * filter
|
||||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||||
|
|
||||||
pld [r7, r1]
|
pld [r7, r1]
|
||||||
|
@ -125,11 +125,10 @@ static INLINE int16x8_t convolve8_8(int16x8_t s0, int16x8_t s1, int16x8_t s2,
|
|||||||
|
|
||||||
void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, // unused
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int y_step_q4, // unused
|
int h) {
|
||||||
int w, int h) {
|
const int16x8_t filters = vld1q_s16(filter[x0_q4]);
|
||||||
const int16x8_t filters = vld1q_s16(filter_x);
|
|
||||||
uint8x8_t t0, t1, t2, t3;
|
uint8x8_t t0, t1, t2, t3;
|
||||||
|
|
||||||
assert(!((intptr_t)dst & 3));
|
assert(!((intptr_t)dst & 3));
|
||||||
@ -137,8 +136,8 @@ void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
(void)y0_q4;
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
(void)filter_y;
|
|
||||||
|
|
||||||
src -= 3;
|
src -= 3;
|
||||||
|
|
||||||
@ -390,11 +389,10 @@ void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, // unused
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int y_step_q4, // unused
|
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
const int16x8_t filters = vld1q_s16(filter_x);
|
const int16x8_t filters = vld1q_s16(filter[x0_q4]);
|
||||||
uint8x8_t t0, t1, t2, t3;
|
uint8x8_t t0, t1, t2, t3;
|
||||||
|
|
||||||
assert(!((intptr_t)dst & 3));
|
assert(!((intptr_t)dst & 3));
|
||||||
@ -402,8 +400,8 @@ void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
(void)y0_q4;
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
(void)filter_y;
|
|
||||||
|
|
||||||
src -= 3;
|
src -= 3;
|
||||||
|
|
||||||
@ -692,19 +690,18 @@ void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, // unused
|
const InterpKernel *filter, int x0_q4,
|
||||||
int x_step_q4, // unused
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
|
||||||
int h) {
|
int h) {
|
||||||
const int16x8_t filters = vld1q_s16(filter_y);
|
const int16x8_t filters = vld1q_s16(filter[y0_q4]);
|
||||||
|
|
||||||
assert(!((intptr_t)dst & 3));
|
assert(!((intptr_t)dst & 3));
|
||||||
assert(!(dst_stride & 3));
|
assert(!(dst_stride & 3));
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
|
|
||||||
|
(void)x0_q4;
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
(void)filter_x;
|
|
||||||
|
|
||||||
src -= 3 * src_stride;
|
src -= 3 * src_stride;
|
||||||
|
|
||||||
@ -864,19 +861,18 @@ void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, // unused
|
const InterpKernel *filter, int x0_q4,
|
||||||
int x_step_q4, // unused
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
|
||||||
int h) {
|
int h) {
|
||||||
const int16x8_t filters = vld1q_s16(filter_y);
|
const int16x8_t filters = vld1q_s16(filter[y0_q4]);
|
||||||
|
|
||||||
assert(!((intptr_t)dst & 3));
|
assert(!((intptr_t)dst & 3));
|
||||||
assert(!(dst_stride & 3));
|
assert(!(dst_stride & 3));
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
|
|
||||||
|
(void)x0_q4;
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
(void)filter_x;
|
|
||||||
|
|
||||||
src -= 3 * src_stride;
|
src -= 3 * src_stride;
|
||||||
|
|
||||||
|
@ -42,10 +42,11 @@
|
|||||||
; r1 int src_stride
|
; r1 int src_stride
|
||||||
; r2 uint8_t *dst
|
; r2 uint8_t *dst
|
||||||
; r3 int dst_stride
|
; r3 int dst_stride
|
||||||
; sp[]const int16_t *filter_x
|
; sp[]const int16_t *filter
|
||||||
; sp[]int x_step_q4
|
; sp[]int x0_q4
|
||||||
; sp[]const int16_t *filter_y ; unused
|
; sp[]int x_step_q4 ; unused
|
||||||
; sp[]int y_step_q4 ; unused
|
; sp[]int y0_q4
|
||||||
|
; sp[]int y_step_q4 ; unused
|
||||||
; sp[]int w
|
; sp[]int w
|
||||||
; sp[]int h
|
; sp[]int h
|
||||||
|
|
||||||
@ -54,11 +55,11 @@
|
|||||||
|
|
||||||
sub r0, r0, #3 ; adjust for taps
|
sub r0, r0, #3 ; adjust for taps
|
||||||
|
|
||||||
ldr r5, [sp, #32] ; filter_x
|
ldrd r4, r5, [sp, #32] ; filter, x0_q4
|
||||||
ldr r6, [sp, #48] ; w
|
add r4, r5, lsl #4
|
||||||
ldr r7, [sp, #52] ; h
|
ldrd r6, r7, [sp, #52] ; w, h
|
||||||
|
|
||||||
vld1.s16 {q0}, [r5] ; filter_x
|
vld1.s16 {q0}, [r4] ; filter
|
||||||
|
|
||||||
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
sub r8, r1, r1, lsl #2 ; -src_stride * 3
|
||||||
add r8, r8, #4 ; -src_stride * 3 + 4
|
add r8, r8, #4 ; -src_stride * 3 + 4
|
||||||
@ -119,7 +120,7 @@ vpx_convolve8_loop_horiz
|
|||||||
|
|
||||||
pld [r5, r1, lsl #1]
|
pld [r5, r1, lsl #1]
|
||||||
|
|
||||||
; src[] * filter_x
|
; src[] * filter
|
||||||
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
|
||||||
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
|
||||||
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
|
||||||
@ -173,11 +174,13 @@ vpx_convolve8_loop_horiz
|
|||||||
sub r0, r0, r1
|
sub r0, r0, r1
|
||||||
sub r0, r0, r1, lsl #1
|
sub r0, r0, r1, lsl #1
|
||||||
|
|
||||||
ldr r4, [sp, #32] ; filter_y
|
ldr r4, [sp, #24] ; filter
|
||||||
ldr r6, [sp, #40] ; w
|
ldr r5, [sp, #36] ; y0_q4
|
||||||
ldr lr, [sp, #44] ; h
|
add r4, r5, lsl #4
|
||||||
|
ldr r6, [sp, #44] ; w
|
||||||
|
ldr lr, [sp, #48] ; h
|
||||||
|
|
||||||
vld1.s16 {q0}, [r4] ; filter_y
|
vld1.s16 {q0}, [r4] ; filter
|
||||||
|
|
||||||
lsl r1, r1, #1
|
lsl r1, r1, #1
|
||||||
lsl r3, r3, #1
|
lsl r3, r3, #1
|
||||||
@ -216,7 +219,7 @@ vpx_convolve8_loop_vert
|
|||||||
pld [r5]
|
pld [r5]
|
||||||
pld [r8]
|
pld [r8]
|
||||||
|
|
||||||
; src[] * filter_y
|
; src[] * filter
|
||||||
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
|
||||||
|
|
||||||
pld [r5, r3]
|
pld [r5, r3]
|
||||||
|
@ -15,13 +15,13 @@
|
|||||||
|
|
||||||
void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride, int w,
|
int y0_q4, int y_step_q4, int w, int h) {
|
||||||
int h) {
|
(void)filter;
|
||||||
(void)filter_x;
|
(void)x0_q4;
|
||||||
(void)filter_x_stride;
|
(void)x_step_q4;
|
||||||
(void)filter_y;
|
(void)y0_q4;
|
||||||
(void)filter_y_stride;
|
(void)y_step_q4;
|
||||||
|
|
||||||
if (w < 8) { // avg4
|
if (w < 8) { // avg4
|
||||||
uint8x8_t s0, s1;
|
uint8x8_t s0, s1;
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
|vpx_convolve_avg_neon| PROC
|
|vpx_convolve_avg_neon| PROC
|
||||||
push {r4-r6, lr}
|
push {r4-r6, lr}
|
||||||
ldrd r4, r5, [sp, #32]
|
ldrd r4, r5, [sp, #36]
|
||||||
mov r6, r2
|
mov r6, r2
|
||||||
|
|
||||||
cmp r4, #32
|
cmp r4, #32
|
||||||
|
@ -15,13 +15,14 @@
|
|||||||
|
|
||||||
void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
if (w < 8) { // copy4
|
if (w < 8) { // copy4
|
||||||
do {
|
do {
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
|vpx_convolve_copy_neon| PROC
|
|vpx_convolve_copy_neon| PROC
|
||||||
push {r4-r5, lr}
|
push {r4-r5, lr}
|
||||||
ldrd r4, r5, [sp, #28]
|
ldrd r4, r5, [sp, #32]
|
||||||
|
|
||||||
cmp r4, #32
|
cmp r4, #32
|
||||||
bgt copy64
|
bgt copy64
|
||||||
|
@ -15,8 +15,8 @@
|
|||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
|
||||||
void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
|
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
|
||||||
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
|
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
|
||||||
@ -33,19 +33,19 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
* height and filter a multiple of 4 lines. Since this goes in to the temp
|
* height and filter a multiple of 4 lines. Since this goes in to the temp
|
||||||
* buffer which has lots of extra room and is subsequently discarded this is
|
* buffer which has lots of extra room and is subsequently discarded this is
|
||||||
* safe if somewhat less than ideal. */
|
* safe if somewhat less than ideal. */
|
||||||
vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter_x,
|
vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w,
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w,
|
||||||
intermediate_height);
|
intermediate_height);
|
||||||
|
|
||||||
/* Step into the temp buffer 3 lines to get the actual frame data */
|
/* Step into the temp buffer 3 lines to get the actual frame data */
|
||||||
vpx_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
uint8_t temp[64 * 72];
|
uint8_t temp[64 * 72];
|
||||||
const int intermediate_height = h + 7;
|
const int intermediate_height = h + 7;
|
||||||
@ -56,9 +56,9 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
/* This implementation has the same issues as above. In addition, we only want
|
/* This implementation has the same issues as above. In addition, we only want
|
||||||
* to average the values after both passes.
|
* to average the values after both passes.
|
||||||
*/
|
*/
|
||||||
vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter_x,
|
vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w,
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w,
|
||||||
intermediate_height);
|
intermediate_height);
|
||||||
vpx_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
@ -219,9 +219,10 @@ static void convolve_bi_avg_vert_64_dspr2(const uint8_t *src,
|
|||||||
|
|
||||||
void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
@ -247,8 +248,8 @@ void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
h);
|
h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -751,9 +751,10 @@ static void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr,
|
|||||||
|
|
||||||
void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
@ -793,8 +794,8 @@ void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
h);
|
h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -628,9 +628,10 @@ static void convolve_bi_horiz_64_dspr2(const uint8_t *src_ptr,
|
|||||||
|
|
||||||
void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
@ -672,8 +673,8 @@ void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
(int32_t)dst_stride, filter_x, (int32_t)h);
|
(int32_t)dst_stride, filter_x, (int32_t)h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -201,9 +201,10 @@ static void convolve_bi_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
@ -228,8 +229,8 @@ void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
|
convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -334,15 +334,16 @@ static void convolve_avg_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
assert(((const int32_t *)filter_y)[1] != 0x800000);
|
assert(((const int32_t *)filter_y)[1] != 0x800000);
|
||||||
|
|
||||||
if (((const int32_t *)filter_y)[0] == 0) {
|
if (((const int32_t *)filter_y)[0] == 0) {
|
||||||
vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
@ -367,8 +368,8 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
h);
|
h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -376,8 +377,8 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
/* Fixed size intermediate buffer places limits on parameters. */
|
/* Fixed size intermediate buffer places limits on parameters. */
|
||||||
DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]);
|
DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]);
|
||||||
@ -390,24 +391,26 @@ void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
if (intermediate_height < h) intermediate_height = h;
|
if (intermediate_height < h) intermediate_height = h;
|
||||||
|
|
||||||
vpx_convolve8_horiz(src - (src_stride * 3), src_stride, temp, 64, filter_x,
|
vpx_convolve8_horiz(src - (src_stride * 3), src_stride, temp, 64, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, intermediate_height);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w,
|
||||||
|
intermediate_height);
|
||||||
|
|
||||||
vpx_convolve8_avg_vert(temp + 64 * 3, 64, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_vert(temp + 64 * 3, 64, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
int x, y;
|
int x, y;
|
||||||
uint32_t tp1, tp2, tn1, tp3, tp4, tn2;
|
uint32_t tp1, tp2, tn1, tp3, tp4, tn2;
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
/* prefetch data to cache memory */
|
/* prefetch data to cache memory */
|
||||||
prefetch_load(src);
|
prefetch_load(src);
|
||||||
|
@ -938,15 +938,16 @@ static void convolve_avg_horiz_64_dspr2(const uint8_t *src_ptr,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
assert(((const int32_t *)filter_x)[1] != 0x800000);
|
assert(((const int32_t *)filter_x)[1] != 0x800000);
|
||||||
|
|
||||||
if (((const int32_t *)filter_x)[0] == 0) {
|
if (((const int32_t *)filter_x)[0] == 0) {
|
||||||
vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
@ -987,9 +988,8 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
h);
|
h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_horiz_c(src + 3, src_stride, dst, dst_stride,
|
vpx_convolve8_avg_horiz_c(src + 3, src_stride, dst, dst_stride, filter,
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
h);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1296,9 +1296,11 @@ void copy_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]);
|
DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]);
|
||||||
int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7;
|
int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7;
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
@ -1395,14 +1397,15 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
|
|
||||||
void vpx_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int w, int h) {
|
int h) {
|
||||||
int x, y;
|
int x, y;
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
/* prefetch data to cache memory */
|
/* prefetch data to cache memory */
|
||||||
prefetch_load(src);
|
prefetch_load(src);
|
||||||
|
@ -818,15 +818,16 @@ static void convolve_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
assert(((const int32_t *)filter_x)[1] != 0x800000);
|
assert(((const int32_t *)filter_x)[1] != 0x800000);
|
||||||
|
|
||||||
if (((const int32_t *)filter_x)[0] == 0) {
|
if (((const int32_t *)filter_x)[0] == 0) {
|
||||||
vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
@ -868,8 +869,8 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
(int32_t)dst_stride, filter_x, (int32_t)h);
|
(int32_t)dst_stride, filter_x, (int32_t)h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -318,15 +318,16 @@ static void convolve_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
assert(((const int32_t *)filter_y)[1] != 0x800000);
|
assert(((const int32_t *)filter_y)[1] != 0x800000);
|
||||||
|
|
||||||
if (((const int32_t *)filter_y)[0] == 0) {
|
if (((const int32_t *)filter_y)[0] == 0) {
|
||||||
vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
uint32_t pos = 38;
|
uint32_t pos = 38;
|
||||||
|
|
||||||
@ -349,8 +350,8 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
|
convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,21 +24,21 @@ extern "C" {
|
|||||||
#if HAVE_DSPR2
|
#if HAVE_DSPR2
|
||||||
void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h);
|
int w, int h);
|
||||||
|
|
||||||
void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h);
|
int w, int h);
|
||||||
|
|
||||||
void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h);
|
int w, int h);
|
||||||
|
|
||||||
void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter, int w,
|
ptrdiff_t dst_stride, const int16_t *filter, int w,
|
||||||
@ -46,9 +46,9 @@ void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
|
|
||||||
void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h);
|
int w, int h);
|
||||||
|
|
||||||
#endif // #if HAVE_DSPR2
|
#endif // #if HAVE_DSPR2
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -633,9 +633,10 @@ static void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
int8_t cnt, filt_hor[8];
|
int8_t cnt, filt_hor[8];
|
||||||
|
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
@ -668,8 +669,8 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
(int32_t)dst_stride, &filt_hor[3], h);
|
(int32_t)dst_stride, &filt_hor[3], h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -695,8 +696,8 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
(int32_t)dst_stride, filt_hor, h);
|
(int32_t)dst_stride, filt_hor, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -516,9 +516,10 @@ static void common_hv_2ht_2vt_and_aver_dst_64w_msa(
|
|||||||
|
|
||||||
void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int y0_q4, int y_step_q4, int w, int h) {
|
||||||
int h) {
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
int8_t cnt, filt_hor[8], filt_ver[8];
|
int8_t cnt, filt_hor[8], filt_ver[8];
|
||||||
|
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
@ -560,14 +561,14 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
&filt_hor[3], &filt_ver[3], h);
|
&filt_hor[3], &filt_ver[3], h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (((const int32_t *)filter_x)[0] == 0 ||
|
} else if (((const int32_t *)filter_x)[0] == 0 ||
|
||||||
((const int32_t *)filter_y)[0] == 0) {
|
((const int32_t *)filter_y)[0] == 0) {
|
||||||
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
switch (w) {
|
switch (w) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -596,8 +597,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
filt_ver, h);
|
filt_ver, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -605,9 +605,10 @@ static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src,
|
|||||||
|
|
||||||
void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
int8_t cnt, filt_ver[8];
|
int8_t cnt, filt_ver[8];
|
||||||
|
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
@ -640,8 +641,8 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
(int32_t)dst_stride, &filt_ver[3], h);
|
(int32_t)dst_stride, &filt_ver[3], h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -668,8 +669,8 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
(int32_t)dst_stride, filt_ver, h);
|
(int32_t)dst_stride, filt_ver, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -621,9 +621,10 @@ static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
int8_t cnt, filt_hor[8];
|
int8_t cnt, filt_hor[8];
|
||||||
|
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
@ -656,8 +657,8 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
&filt_hor[3], h);
|
&filt_hor[3], h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -683,8 +684,8 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
filt_hor, h);
|
filt_hor, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -541,9 +541,11 @@ static void common_hv_2ht_2vt_64w_msa(const uint8_t *src, int32_t src_stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int32_t x_step_q4, const int16_t *filter_y,
|
int x0_q4, int32_t x_step_q4, int y0_q4,
|
||||||
int32_t y_step_q4, int32_t w, int32_t h) {
|
int32_t y_step_q4, int32_t w, int32_t h) {
|
||||||
|
const int16_t *const filter_x = filter[x0_q4];
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
int8_t cnt, filt_hor[8], filt_ver[8];
|
int8_t cnt, filt_hor[8], filt_ver[8];
|
||||||
|
|
||||||
assert(x_step_q4 == 16);
|
assert(x_step_q4 == 16);
|
||||||
@ -585,14 +587,14 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
&filt_ver[3], (int32_t)h);
|
&filt_ver[3], (int32_t)h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (((const int32_t *)filter_x)[0] == 0 ||
|
} else if (((const int32_t *)filter_x)[0] == 0 ||
|
||||||
((const int32_t *)filter_y)[0] == 0) {
|
((const int32_t *)filter_y)[0] == 0) {
|
||||||
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
switch (w) {
|
switch (w) {
|
||||||
case 4:
|
case 4:
|
||||||
@ -621,8 +623,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
(int32_t)h);
|
(int32_t)h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -628,9 +628,10 @@ static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
|
const int16_t *const filter_y = filter[y0_q4];
|
||||||
int8_t cnt, filt_ver[8];
|
int8_t cnt, filt_ver[8];
|
||||||
|
|
||||||
assert(y_step_q4 == 16);
|
assert(y_step_q4 == 16);
|
||||||
@ -663,8 +664,8 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
&filt_ver[3], h);
|
&filt_ver[3], h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -690,8 +691,8 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
filt_ver, h);
|
filt_ver, h);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -189,13 +189,14 @@ static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int32_t filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int32_t filter_y_stride,
|
int32_t x_step_q4, int y0_q4, int32_t y_step_q4,
|
||||||
int32_t w, int32_t h) {
|
int32_t w, int32_t h) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_y;
|
(void)x0_q4;
|
||||||
(void)filter_x_stride;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
switch (w) {
|
switch (w) {
|
||||||
case 4: {
|
case 4: {
|
||||||
|
@ -199,13 +199,14 @@ static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int32_t filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int32_t filter_y_stride,
|
int32_t x_step_q4, int y0_q4, int32_t y_step_q4,
|
||||||
int32_t w, int32_t h) {
|
int32_t w, int32_t h) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_y;
|
(void)x0_q4;
|
||||||
(void)filter_x_stride;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
switch (w) {
|
switch (w) {
|
||||||
case 4: {
|
case 4: {
|
||||||
|
@ -53,13 +53,13 @@ static inline void copy_w64(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve_copy_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_copy_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int32_t filter_x_stride,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int32_t filter_y_stride,
|
int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) {
|
||||||
int32_t w, int32_t h) {
|
(void)filter;
|
||||||
(void)filter_x;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_x_stride;
|
(void)y0_q4;
|
||||||
(void)filter_y_stride;
|
(void)y_step_q4;
|
||||||
|
|
||||||
switch (w) {
|
switch (w) {
|
||||||
case 16: {
|
case 16: {
|
||||||
@ -132,14 +132,8 @@ static inline void avg_w64(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int32_t filter_x_stride,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int32_t filter_y_stride,
|
int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) {
|
||||||
int32_t w, int32_t h) {
|
|
||||||
(void)filter_x;
|
|
||||||
(void)filter_y;
|
|
||||||
(void)filter_x_stride;
|
|
||||||
(void)filter_y_stride;
|
|
||||||
|
|
||||||
switch (w) {
|
switch (w) {
|
||||||
case 16: {
|
case 16: {
|
||||||
avg_w16(src, src_stride, dst, dst_stride, h);
|
avg_w16(src, src_stride, dst, dst_stride, h);
|
||||||
@ -154,8 +148,8 @@ void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
vpx_convolve_avg_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_x_stride, filter_y, filter_y_stride, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -299,9 +293,9 @@ static inline void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
static inline void convolve(const uint8_t *src, ptrdiff_t src_stride,
|
static inline void convolve(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *const x_filters, int x0_q4,
|
const InterpKernel *const filter, int x0_q4,
|
||||||
int x_step_q4, const InterpKernel *const y_filters,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int y0_q4, int y_step_q4, int w, int h) {
|
int h) {
|
||||||
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
||||||
// 2d filtering proceeds in 2 steps:
|
// 2d filtering proceeds in 2 steps:
|
||||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||||
@ -324,95 +318,77 @@ static inline void convolve(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
assert(x_step_q4 <= 32);
|
assert(x_step_q4 <= 32);
|
||||||
|
|
||||||
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
|
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
|
||||||
x_filters, x0_q4, x_step_q4, w, intermediate_height);
|
filter, x0_q4, x_step_q4, w, intermediate_height);
|
||||||
convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
|
convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter,
|
||||||
y_filters, y0_q4, y_step_q4, w, h);
|
y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
(void)y0_q4;
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
|
||||||
|
|
||||||
(void)filter_y;
|
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
|
|
||||||
convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
|
convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w,
|
||||||
w, h);
|
h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
(void)y0_q4;
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
|
||||||
|
|
||||||
(void)filter_y;
|
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
|
|
||||||
convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
|
convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
|
||||||
x_step_q4, w, h);
|
w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_vert_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_vert_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
(void)x0_q4;
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
(void)filter_x;
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
|
||||||
convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
|
convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w,
|
||||||
w, h);
|
h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_vert_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_vert_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
(void)x0_q4;
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
(void)filter_x;
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
|
||||||
convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
|
convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4,
|
||||||
y_step_q4, w, h);
|
w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve8_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4,
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
y_step_q4, w, h);
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
|
|
||||||
filters_y, y0_q4, y_step_q4, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int y0_q4, int y_step_q4, int w, int h) {
|
||||||
int h) {
|
|
||||||
// Fixed size intermediate buffer places limits on parameters.
|
// Fixed size intermediate buffer places limits on parameters.
|
||||||
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
|
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
|
||||||
assert(w <= 64);
|
assert(w <= 64);
|
||||||
assert(h <= 64);
|
assert(h <= 64);
|
||||||
|
|
||||||
vpx_convolve8_vsx(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
|
vpx_convolve8_vsx(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4,
|
||||||
y_step_q4, w, h);
|
y_step_q4, w, h);
|
||||||
vpx_convolve_avg_vsx(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
|
vpx_convolve_avg_vsx(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
|
||||||
}
|
}
|
||||||
|
@ -114,10 +114,9 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const InterpKernel *const x_filters,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x0_q4, int x_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
const InterpKernel *const y_filters, int y0_q4,
|
int h) {
|
||||||
int y_step_q4, int w, int h) {
|
|
||||||
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
||||||
// 2d filtering proceeds in 2 steps:
|
// 2d filtering proceeds in 2 steps:
|
||||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||||
@ -140,108 +139,86 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
assert(x_step_q4 <= 32);
|
assert(x_step_q4 <= 32);
|
||||||
|
|
||||||
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
|
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
|
||||||
x_filters, x0_q4, x_step_q4, w, intermediate_height);
|
filter, x0_q4, x_step_q4, w, intermediate_height);
|
||||||
convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
|
convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter,
|
||||||
y_filters, y0_q4, y_step_q4, w, h);
|
y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int y0_q4, int y_step_q4, int w, int h) {
|
||||||
int h) {
|
(void)y0_q4;
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
|
||||||
|
|
||||||
(void)filter_y;
|
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
|
convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w,
|
||||||
convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
|
h);
|
||||||
w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
(void)y0_q4;
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
|
||||||
|
|
||||||
(void)filter_y;
|
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
|
convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
|
||||||
convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
|
w, h);
|
||||||
x_step_q4, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int y0_q4, int y_step_q4, int w, int h) {
|
||||||
int h) {
|
(void)x0_q4;
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
(void)filter_x;
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w,
|
||||||
convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
|
h);
|
||||||
w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
(void)x0_q4;
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
(void)filter_x;
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4,
|
||||||
convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
|
w, h);
|
||||||
y_step_q4, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int w, int h) {
|
int h) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4,
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
y_step_q4, w, h);
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
|
|
||||||
filters_y, y0_q4, y_step_q4, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
// Fixed size intermediate buffer places limits on parameters.
|
// Fixed size intermediate buffer places limits on parameters.
|
||||||
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
|
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
|
||||||
assert(w <= 64);
|
assert(w <= 64);
|
||||||
assert(h <= 64);
|
assert(h <= 64);
|
||||||
|
|
||||||
vpx_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
|
vpx_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4,
|
||||||
y_step_q4, w, h);
|
y_step_q4, w, h);
|
||||||
vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
|
vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int filter_x_stride, const int16_t *filter_y,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int filter_y_stride, int w, int h) {
|
int w, int h) {
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
for (r = h; r > 0; --r) {
|
for (r = h; r > 0; --r) {
|
||||||
memcpy(dst, src, w);
|
memcpy(dst, src, w);
|
||||||
@ -251,15 +228,16 @@ void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int filter_x_stride, const int16_t *filter_y,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int filter_y_stride, int w, int h) {
|
int w, int h) {
|
||||||
int x, y;
|
int x, y;
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
|
|
||||||
for (y = 0; y < h; ++y) {
|
for (y = 0; y < h; ++y) {
|
||||||
for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
|
for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
|
||||||
@ -269,53 +247,52 @@ void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int w, int h) {
|
int h) {
|
||||||
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h) {
|
int h) {
|
||||||
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int y0_q4, int y_step_q4, int w, int h) {
|
||||||
int h) {
|
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
x_step_q4, filter_y, y_step_q4, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
|
vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
filter_y, y_step_q4, w, h);
|
x_step_q4, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
@ -417,9 +394,9 @@ static void highbd_convolve_avg_vert(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
|
static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *const x_filters, int x0_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
int x_step_q4, const InterpKernel *const y_filters,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int y0_q4, int y_step_q4, int w, int h, int bd) {
|
int h, int bd) {
|
||||||
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
||||||
// 2d filtering proceeds in 2 steps:
|
// 2d filtering proceeds in 2 steps:
|
||||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||||
@ -442,113 +419,97 @@ static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
assert(x_step_q4 <= 32);
|
assert(x_step_q4 <= 32);
|
||||||
|
|
||||||
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
|
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
|
||||||
temp, 64, x_filters, x0_q4, x_step_q4, w,
|
temp, 64, filter, x0_q4, x_step_q4, w,
|
||||||
intermediate_height, bd);
|
intermediate_height, bd);
|
||||||
highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
|
highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
|
||||||
y_filters, y0_q4, y_step_q4, w, h, bd);
|
filter, y0_q4, y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
(void)y0_q4;
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
|
||||||
|
|
||||||
(void)filter_y;
|
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
|
|
||||||
highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
|
highbd_convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, w, h, bd);
|
x_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
(void)y0_q4;
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
|
||||||
|
|
||||||
(void)filter_y;
|
|
||||||
(void)y_step_q4;
|
(void)y_step_q4;
|
||||||
|
|
||||||
highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
|
highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
|
||||||
x_step_q4, w, h, bd);
|
x_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
(void)x0_q4;
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
(void)filter_x;
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
|
||||||
highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
|
highbd_convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4,
|
||||||
y_step_q4, w, h, bd);
|
y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
(void)x0_q4;
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
(void)filter_x;
|
|
||||||
(void)x_step_q4;
|
(void)x_step_q4;
|
||||||
|
|
||||||
highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
|
highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4,
|
||||||
y_step_q4, w, h, bd);
|
y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
highbd_convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
y0_q4, y_step_q4, w, h, bd);
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
|
|
||||||
filters_y, y0_q4, y_step_q4, w, h, bd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
// Fixed size intermediate buffer places limits on parameters.
|
// Fixed size intermediate buffer places limits on parameters.
|
||||||
DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
|
DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
|
||||||
assert(w <= 64);
|
assert(w <= 64);
|
||||||
assert(h <= 64);
|
assert(h <= 64);
|
||||||
|
|
||||||
vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4,
|
vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4,
|
||||||
filter_y, y_step_q4, w, h, bd);
|
y0_q4, y_step_q4, w, h, bd);
|
||||||
vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h,
|
vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h,
|
||||||
bd);
|
bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int w, int h, int bd) {
|
int h, int bd) {
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
for (r = h; r > 0; --r) {
|
for (r = h; r > 0; --r) {
|
||||||
@ -560,15 +521,16 @@ void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int w, int h, int bd) {
|
int h, int bd) {
|
||||||
int x, y;
|
int x, y;
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_x_stride;
|
(void)x0_q4;
|
||||||
(void)filter_y;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
for (y = 0; y < h; ++y) {
|
for (y = 0; y < h; ++y) {
|
||||||
|
@ -19,15 +19,15 @@ extern "C" {
|
|||||||
|
|
||||||
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int h);
|
int h);
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
typedef void (*highbd_convolve_fn_t)(const uint16_t *src, ptrdiff_t src_stride,
|
typedef void (*highbd_convolve_fn_t)(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h, int bd);
|
int w, int h, int bd);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ print <<EOF
|
|||||||
|
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
#include "vpx_dsp/vpx_dsp_common.h"
|
#include "vpx_dsp/vpx_dsp_common.h"
|
||||||
|
#include "vpx_dsp/vpx_filter.h"
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
@ -331,69 +332,69 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
#
|
#
|
||||||
# Sub Pixel Filters
|
# Sub Pixel Filters
|
||||||
#
|
#
|
||||||
add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/;
|
specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/;
|
specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx/;
|
specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/;
|
specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx/;
|
specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa vsx/;
|
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa vsx/;
|
specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa vsx/;
|
specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa vsx/;
|
||||||
|
|
||||||
add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
specialize qw/vpx_scaled_2d ssse3/;
|
specialize qw/vpx_scaled_2d ssse3/;
|
||||||
|
|
||||||
add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
|
|
||||||
add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
|
|
||||||
add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
|
|
||||||
add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
|
|
||||||
add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
|
add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
|
||||||
|
|
||||||
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||||
#
|
#
|
||||||
# Sub Pixel Filters
|
# Sub Pixel Filters
|
||||||
#
|
#
|
||||||
add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
|
specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
|
specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8 avx2 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8 avx2 neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_horiz avx2 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_horiz avx2 neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_vert avx2 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_vert avx2 neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_avg avx2 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_avg avx2 neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_avg_vert avx2 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_avg_vert avx2 neon/, "$sse2_x86_64";
|
||||||
} # CONFIG_VP9_HIGHBITDEPTH
|
} # CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
@ -26,17 +26,6 @@ extern "C" {
|
|||||||
|
|
||||||
typedef int16_t InterpKernel[SUBPEL_TAPS];
|
typedef int16_t InterpKernel[SUBPEL_TAPS];
|
||||||
|
|
||||||
static INLINE const InterpKernel *get_filter_base(const int16_t *filter) {
|
|
||||||
// NOTE: This assumes that the filter table is 256-byte aligned.
|
|
||||||
// TODO(agrange) Modify to make independent of table alignment.
|
|
||||||
return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
|
|
||||||
}
|
|
||||||
|
|
||||||
static INLINE int get_filter_offset(const int16_t *f,
|
|
||||||
const InterpKernel *base) {
|
|
||||||
return (int)((const InterpKernel *)(intptr_t)f - base);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
@ -20,14 +20,15 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
|
|||||||
uint8_t *output_ptr, ptrdiff_t out_pitch,
|
uint8_t *output_ptr, ptrdiff_t out_pitch,
|
||||||
uint32_t output_height, const int16_t *filter);
|
uint32_t output_height, const int16_t *filter);
|
||||||
|
|
||||||
#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
|
#define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \
|
||||||
void vpx_convolve8_##name##_##opt( \
|
void vpx_convolve8_##name##_##opt( \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \
|
||||||
const int16_t *filter_y, int y_step_q4, int w, int h) { \
|
int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \
|
||||||
(void)filter_x; \
|
const int16_t *filter = filter_kernel[offset]; \
|
||||||
|
(void)x0_q4; \
|
||||||
(void)x_step_q4; \
|
(void)x_step_q4; \
|
||||||
(void)filter_y; \
|
(void)y0_q4; \
|
||||||
(void)y_step_q4; \
|
(void)y_step_q4; \
|
||||||
assert(filter[3] != 128); \
|
assert(filter[3] != 128); \
|
||||||
assert(step_q4 == 16); \
|
assert(step_q4 == 16); \
|
||||||
@ -64,32 +65,36 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FUN_CONV_2D(avg, opt) \
|
#define FUN_CONV_2D(avg, opt) \
|
||||||
void vpx_convolve8_##avg##opt( \
|
void vpx_convolve8_##avg##opt( \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
|
||||||
const int16_t *filter_y, int y_step_q4, int w, int h) { \
|
int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \
|
||||||
assert(filter_x[3] != 128); \
|
const int16_t *filter_x = filter[x0_q4]; \
|
||||||
assert(filter_y[3] != 128); \
|
const int16_t *filter_y = filter[y0_q4]; \
|
||||||
assert(w <= 64); \
|
(void)filter_y; \
|
||||||
assert(h <= 64); \
|
assert(filter_x[3] != 128); \
|
||||||
assert(x_step_q4 == 16); \
|
assert(filter_y[3] != 128); \
|
||||||
assert(y_step_q4 == 16); \
|
assert(w <= 64); \
|
||||||
if (filter_x[0] | filter_x[1] | filter_x[2]) { \
|
assert(h <= 64); \
|
||||||
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
|
assert(x_step_q4 == 16); \
|
||||||
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
|
assert(y_step_q4 == 16); \
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, w, \
|
if (filter_x[0] | filter_x[1] | filter_x[2]) { \
|
||||||
h + 7); \
|
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
|
||||||
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
|
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
|
||||||
filter_x, x_step_q4, filter_y, \
|
filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
|
||||||
y_step_q4, w, h); \
|
h + 7); \
|
||||||
} else { \
|
vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
|
||||||
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
|
filter, x0_q4, x_step_q4, y0_q4, \
|
||||||
vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter_x, \
|
y_step_q4, w, h); \
|
||||||
x_step_q4, filter_y, y_step_q4, w, h + 1); \
|
} else { \
|
||||||
vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter_x, \
|
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
|
||||||
x_step_q4, filter_y, y_step_q4, w, h); \
|
vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \
|
||||||
} \
|
x_step_q4, y0_q4, y_step_q4, w, h + 1); \
|
||||||
|
vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \
|
||||||
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
|
||||||
|
h); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
@ -101,95 +106,97 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
|
|||||||
unsigned int output_height,
|
unsigned int output_height,
|
||||||
const int16_t *filter, int bd);
|
const int16_t *filter, int bd);
|
||||||
|
|
||||||
#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
|
#define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \
|
||||||
void vpx_highbd_convolve8_##name##_##opt( \
|
void vpx_highbd_convolve8_##name##_##opt( \
|
||||||
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
|
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \
|
||||||
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
|
int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \
|
||||||
if (step_q4 == 16 && filter[3] != 128) { \
|
const int16_t *filter = filter_kernel[offset]; \
|
||||||
if (filter[0] | filter[1] | filter[2]) { \
|
if (step_q4 == 16 && filter[3] != 128) { \
|
||||||
while (w >= 16) { \
|
if (filter[0] | filter[1] | filter[2]) { \
|
||||||
vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
|
while (w >= 16) { \
|
||||||
src_start, src_stride, dst, dst_stride, h, filter, bd); \
|
vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
|
||||||
src += 16; \
|
src_start, src_stride, dst, dst_stride, h, filter, bd); \
|
||||||
dst += 16; \
|
src += 16; \
|
||||||
w -= 16; \
|
dst += 16; \
|
||||||
} \
|
w -= 16; \
|
||||||
while (w >= 8) { \
|
} \
|
||||||
vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \
|
while (w >= 8) { \
|
||||||
src_start, src_stride, dst, dst_stride, h, filter, bd); \
|
vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \
|
||||||
src += 8; \
|
src_start, src_stride, dst, dst_stride, h, filter, bd); \
|
||||||
dst += 8; \
|
src += 8; \
|
||||||
w -= 8; \
|
dst += 8; \
|
||||||
} \
|
w -= 8; \
|
||||||
while (w >= 4) { \
|
} \
|
||||||
vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \
|
while (w >= 4) { \
|
||||||
src_start, src_stride, dst, dst_stride, h, filter, bd); \
|
vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \
|
||||||
src += 4; \
|
src_start, src_stride, dst, dst_stride, h, filter, bd); \
|
||||||
dst += 4; \
|
src += 4; \
|
||||||
w -= 4; \
|
dst += 4; \
|
||||||
} \
|
w -= 4; \
|
||||||
} else { \
|
} \
|
||||||
while (w >= 16) { \
|
} else { \
|
||||||
vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \
|
while (w >= 16) { \
|
||||||
src, src_stride, dst, dst_stride, h, filter, bd); \
|
vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \
|
||||||
src += 16; \
|
src, src_stride, dst, dst_stride, h, filter, bd); \
|
||||||
dst += 16; \
|
src += 16; \
|
||||||
w -= 16; \
|
dst += 16; \
|
||||||
} \
|
w -= 16; \
|
||||||
while (w >= 8) { \
|
} \
|
||||||
vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \
|
while (w >= 8) { \
|
||||||
src, src_stride, dst, dst_stride, h, filter, bd); \
|
vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \
|
||||||
src += 8; \
|
src, src_stride, dst, dst_stride, h, filter, bd); \
|
||||||
dst += 8; \
|
src += 8; \
|
||||||
w -= 8; \
|
dst += 8; \
|
||||||
} \
|
w -= 8; \
|
||||||
while (w >= 4) { \
|
} \
|
||||||
vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \
|
while (w >= 4) { \
|
||||||
src, src_stride, dst, dst_stride, h, filter, bd); \
|
vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \
|
||||||
src += 4; \
|
src, src_stride, dst, dst_stride, h, filter, bd); \
|
||||||
dst += 4; \
|
src += 4; \
|
||||||
w -= 4; \
|
dst += 4; \
|
||||||
} \
|
w -= 4; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
if (w) { \
|
} \
|
||||||
vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
|
if (w) { \
|
||||||
filter_x, x_step_q4, filter_y, \
|
vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
|
||||||
y_step_q4, w, h, bd); \
|
filter_kernel, x0_q4, x_step_q4, y0_q4, \
|
||||||
} \
|
y_step_q4, w, h, bd); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HIGH_FUN_CONV_2D(avg, opt) \
|
#define HIGH_FUN_CONV_2D(avg, opt) \
|
||||||
void vpx_highbd_convolve8_##avg##opt( \
|
void vpx_highbd_convolve8_##avg##opt( \
|
||||||
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
|
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
|
||||||
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
|
int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \
|
||||||
assert(w <= 64); \
|
const int16_t *filter_x = filter[x0_q4]; \
|
||||||
assert(h <= 64); \
|
assert(w <= 64); \
|
||||||
if (x_step_q4 == 16 && y_step_q4 == 16) { \
|
assert(h <= 64); \
|
||||||
if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
|
if (x_step_q4 == 16 && y_step_q4 == 16) { \
|
||||||
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
|
if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
|
||||||
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
|
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
|
||||||
fdata2, 64, filter_x, x_step_q4, \
|
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
|
||||||
filter_y, y_step_q4, w, h + 7, bd); \
|
fdata2, 64, filter, x0_q4, x_step_q4, \
|
||||||
vpx_highbd_convolve8_##avg##vert_##opt( \
|
y0_q4, y_step_q4, w, h + 7, bd); \
|
||||||
fdata2 + 192, 64, dst, dst_stride, filter_x, x_step_q4, filter_y, \
|
vpx_highbd_convolve8_##avg##vert_##opt( \
|
||||||
y_step_q4, w, h, bd); \
|
fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \
|
||||||
} else { \
|
y0_q4, y_step_q4, w, h, bd); \
|
||||||
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
|
} else { \
|
||||||
vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
|
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
|
||||||
filter_x, x_step_q4, filter_y, \
|
vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \
|
||||||
y_step_q4, w, h + 1, bd); \
|
x0_q4, x_step_q4, y0_q4, y_step_q4, \
|
||||||
vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
|
w, h + 1, bd); \
|
||||||
filter_x, x_step_q4, filter_y, \
|
vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
|
||||||
y_step_q4, w, h, bd); \
|
filter, x0_q4, x_step_q4, \
|
||||||
} \
|
y0_q4, y_step_q4, w, h, bd); \
|
||||||
} else { \
|
} \
|
||||||
vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
|
} else { \
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, \
|
vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \
|
||||||
w, h, bd); \
|
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \
|
||||||
} \
|
bd); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
@ -18,13 +18,14 @@
|
|||||||
|
|
||||||
void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int width, int h, int bd) {
|
int width, int h, int bd) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_y;
|
(void)x0_q4;
|
||||||
(void)filter_x_stride;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
assert(width % 4 == 0);
|
assert(width % 4 == 0);
|
||||||
@ -99,13 +100,14 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint16_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const InterpKernel *filter, int x0_q4,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int width, int h, int bd) {
|
int width, int h, int bd) {
|
||||||
(void)filter_x;
|
(void)filter;
|
||||||
(void)filter_y;
|
(void)x0_q4;
|
||||||
(void)filter_x_stride;
|
(void)x_step_q4;
|
||||||
(void)filter_y_stride;
|
(void)y0_q4;
|
||||||
|
(void)y_step_q4;
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
assert(width % 4 == 0);
|
assert(width % 4 == 0);
|
||||||
@ -1073,8 +1075,8 @@ void vpx_highbd_filter_block1d4_v2_sse2(const uint16_t *, ptrdiff_t, uint16_t *,
|
|||||||
#define vpx_highbd_filter_block1d4_v8_avx2 vpx_highbd_filter_block1d4_v8_sse2
|
#define vpx_highbd_filter_block1d4_v8_avx2 vpx_highbd_filter_block1d4_v8_sse2
|
||||||
#define vpx_highbd_filter_block1d4_v2_avx2 vpx_highbd_filter_block1d4_v2_sse2
|
#define vpx_highbd_filter_block1d4_v2_avx2 vpx_highbd_filter_block1d4_v2_sse2
|
||||||
|
|
||||||
HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
|
HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2);
|
||||||
HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
|
HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , avx2);
|
||||||
HIGH_FUN_CONV_2D(, avx2);
|
HIGH_FUN_CONV_2D(, avx2);
|
||||||
|
|
||||||
void vpx_highbd_filter_block1d4_h8_avg_sse2(const uint16_t *, ptrdiff_t,
|
void vpx_highbd_filter_block1d4_h8_avg_sse2(const uint16_t *, ptrdiff_t,
|
||||||
@ -1098,8 +1100,8 @@ void vpx_highbd_filter_block1d4_v2_avg_sse2(const uint16_t *, ptrdiff_t,
|
|||||||
#define vpx_highbd_filter_block1d4_v2_avg_avx2 \
|
#define vpx_highbd_filter_block1d4_v2_avg_avx2 \
|
||||||
vpx_highbd_filter_block1d4_v2_avg_sse2
|
vpx_highbd_filter_block1d4_v2_avg_sse2
|
||||||
|
|
||||||
HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, avx2);
|
HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2);
|
||||||
HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
|
HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
|
||||||
avx2);
|
avx2);
|
||||||
HIGH_FUN_CONV_2D(avg_, avx2);
|
HIGH_FUN_CONV_2D(avg_, avx2);
|
||||||
|
|
||||||
|
@ -41,38 +41,38 @@ filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2;
|
|||||||
|
|
||||||
// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4,
|
||||||
// int w, int h);
|
// int y_step_q4, int w, int h);
|
||||||
// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
|
FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
|
||||||
FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
|
FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
|
||||||
FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
|
FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
|
||||||
FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
|
FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, sse2);
|
||||||
|
|
||||||
// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
FUN_CONV_2D(, sse2);
|
FUN_CONV_2D(, sse2);
|
||||||
FUN_CONV_2D(avg_, sse2);
|
FUN_CONV_2D(avg_, sse2);
|
||||||
@ -140,22 +140,22 @@ highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
|
|||||||
// const int16_t *filter_y,
|
// const int16_t *filter_y,
|
||||||
// int y_step_q4,
|
// int y_step_q4,
|
||||||
// int w, int h, int bd);
|
// int w, int h, int bd);
|
||||||
HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
|
HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
|
||||||
HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
|
HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
|
||||||
HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
|
HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
|
||||||
HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
|
HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
|
||||||
sse2);
|
sse2);
|
||||||
|
|
||||||
// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h, int bd);
|
// int w, int h, int bd);
|
||||||
// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4,
|
||||||
// int w, int h, int bd);
|
// int y_step_q4, int w, int h, int bd);
|
||||||
HIGH_FUN_CONV_2D(, sse2);
|
HIGH_FUN_CONV_2D(, sse2);
|
||||||
HIGH_FUN_CONV_2D(avg_, sse2);
|
HIGH_FUN_CONV_2D(avg_, sse2);
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||||
|
@ -20,14 +20,14 @@ SECTION .text
|
|||||||
%endif
|
%endif
|
||||||
%ifidn %2, highbd
|
%ifidn %2, highbd
|
||||||
%define pavg pavgw
|
%define pavg pavgw
|
||||||
cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
|
cglobal %2_convolve_%1, 4, 8, 4+AUX_XMM_REGS, src, src_stride, \
|
||||||
dst, dst_stride, \
|
dst, dst_stride, \
|
||||||
fx, fxs, fy, fys, w, h, bd
|
f, fxo, fxs, fyo, fys, w, h, bd
|
||||||
%else
|
%else
|
||||||
%define pavg pavgb
|
%define pavg pavgb
|
||||||
cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
|
cglobal convolve_%1, 4, 8, 4+AUX_XMM_REGS, src, src_stride, \
|
||||||
dst, dst_stride, \
|
dst, dst_stride, \
|
||||||
fx, fxs, fy, fys, w, h
|
f, fxo, fxs, fyo, fys, w, h
|
||||||
%endif
|
%endif
|
||||||
mov r4d, dword wm
|
mov r4d, dword wm
|
||||||
%ifidn %2, highbd
|
%ifidn %2, highbd
|
||||||
|
@ -554,21 +554,21 @@ filter8_1dfunction vpx_filter_block1d4_h2_ssse3;
|
|||||||
#define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3
|
#define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3
|
||||||
// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
|
FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2);
|
||||||
FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
|
FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , avx2);
|
||||||
|
|
||||||
// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
FUN_CONV_2D(, avx2);
|
FUN_CONV_2D(, avx2);
|
||||||
#endif // HAVE_AX2 && HAVE_SSSE3
|
#endif // HAVE_AX2 && HAVE_SSSE3
|
||||||
|
@ -306,29 +306,28 @@ filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3;
|
|||||||
|
|
||||||
// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4,
|
||||||
// int w, int h);
|
// int y_step_q4, int w, int h);
|
||||||
// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4,
|
||||||
// int w, int h);
|
// int y_step_q4, int w, int h);
|
||||||
FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
|
FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , ssse3);
|
||||||
FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
|
FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , ssse3);
|
||||||
FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
|
FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, ssse3);
|
||||||
FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
|
FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, ssse3);
|
||||||
ssse3);
|
|
||||||
|
|
||||||
#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
|
#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
|
||||||
out2, out3, out4, out5, out6, out7) \
|
out2, out3, out4, out5, out6, out7) \
|
||||||
@ -813,9 +812,9 @@ static void scaledconvolve_vert_w16(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
|
static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *const x_filters, int x0_q4,
|
const InterpKernel *const filter, int x0_q4,
|
||||||
int x_step_q4, const InterpKernel *const y_filters,
|
int x_step_q4, int y0_q4, int y_step_q4, int w,
|
||||||
int y0_q4, int y_step_q4, int w, int h) {
|
int h) {
|
||||||
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
||||||
// 2d filtering proceeds in 2 steps:
|
// 2d filtering proceeds in 2 steps:
|
||||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||||
@ -840,49 +839,43 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
|
|
||||||
if (w >= 8) {
|
if (w >= 8) {
|
||||||
scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),
|
scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),
|
||||||
src_stride, temp, 64, x_filters, x0_q4, x_step_q4,
|
src_stride, temp, 64, filter, x0_q4, x_step_q4, w,
|
||||||
w, intermediate_height);
|
intermediate_height);
|
||||||
} else {
|
} else {
|
||||||
scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1),
|
scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1),
|
||||||
src_stride, temp, 64, x_filters, x0_q4, x_step_q4,
|
src_stride, temp, 64, filter, x0_q4, x_step_q4, w,
|
||||||
w, intermediate_height);
|
intermediate_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (w >= 16) {
|
if (w >= 16) {
|
||||||
scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
|
scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
|
||||||
dst_stride, y_filters, y0_q4, y_step_q4, w, h);
|
dst_stride, filter, y0_q4, y_step_q4, w, h);
|
||||||
} else if (w == 8) {
|
} else if (w == 8) {
|
||||||
scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
|
scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
|
||||||
dst_stride, y_filters, y0_q4, y_step_q4, w, h);
|
dst_stride, filter, y0_q4, y_step_q4, w, h);
|
||||||
} else {
|
} else {
|
||||||
scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
|
scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
|
||||||
dst_stride, y_filters, y0_q4, y_step_q4, w, h);
|
dst_stride, filter, y0_q4, y_step_q4, w, h);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x,
|
ptrdiff_t dst_stride, const InterpKernel *filter,
|
||||||
int x_step_q4, const int16_t *filter_y, int y_step_q4,
|
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
|
||||||
int w, int h) {
|
int w, int h) {
|
||||||
const InterpKernel *const filters_x = get_filter_base(filter_x);
|
scaledconvolve2d(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
|
||||||
const int x0_q4 = get_filter_offset(filter_x, filters_x);
|
y0_q4, y_step_q4, w, h);
|
||||||
|
|
||||||
const InterpKernel *const filters_y = get_filter_base(filter_y);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, filters_y);
|
|
||||||
|
|
||||||
scaledconvolve2d(src, src_stride, dst, dst_stride, filters_x, x0_q4,
|
|
||||||
x_step_q4, filters_y, y0_q4, y_step_q4, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
|
||||||
// uint8_t *dst, ptrdiff_t dst_stride,
|
// uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
// const int16_t *filter_x, int x_step_q4,
|
// const InterpKernel *filter, int x0_q4,
|
||||||
// const int16_t *filter_y, int y_step_q4,
|
// int32_t x_step_q4, int y0_q4, int y_step_q4,
|
||||||
// int w, int h);
|
// int w, int h);
|
||||||
FUN_CONV_2D(, ssse3);
|
FUN_CONV_2D(, ssse3);
|
||||||
FUN_CONV_2D(avg_, ssse3);
|
FUN_CONV_2D(avg_, ssse3);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user