Merge "Update highbd convolve functions arguments to use uint16_t src/dst"
This commit is contained in:
commit
54c4e0f7a5
@ -917,13 +917,14 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
|
|||||||
using std::tr1::make_tuple;
|
using std::tr1::make_tuple;
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
#define WRAP(func, bd) \
|
#define WRAP(func, bd) \
|
||||||
void wrap_##func##_##bd( \
|
void wrap_##func##_##bd( \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
|
ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
|
||||||
const int16_t *filter_y, int filter_y_stride, int w, int h) { \
|
const int16_t *filter_y, int filter_y_stride, int w, int h) { \
|
||||||
vpx_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \
|
vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
|
||||||
filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
|
reinterpret_cast<uint16_t *>(dst), dst_stride, filter_x, \
|
||||||
|
filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_SSE2 && ARCH_X86_64
|
#if HAVE_SSE2 && ARCH_X86_64
|
||||||
|
@ -37,9 +37,8 @@ static INLINE void highbd_inter_predictor(
|
|||||||
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
|
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
|
||||||
int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
|
int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
|
||||||
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
|
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src)), src_stride,
|
CONVERT_TO_SHORTPTR(src), src_stride, CONVERT_TO_SHORTPTR(dst),
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)), dst_stride, kernel[subpel_x],
|
dst_stride, kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
|
||||||
xs, kernel[subpel_y], ys, w, h, bd);
|
|
||||||
}
|
}
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
@ -2418,11 +2418,11 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
|
|||||||
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
|
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
|
||||||
|
|
||||||
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
vpx_highbd_convolve8(
|
vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src_ptr)), src_stride,
|
CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst_ptr)), dst_stride,
|
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
|
||||||
kernel[x_q4 & 0xf], 16 * src_w / dst_w, kernel[y_q4 & 0xf],
|
kernel[y_q4 & 0xf], 16 * src_h / dst_h,
|
||||||
16 * src_h / dst_h, 16 / factor, 16 / factor, bd);
|
16 / factor, 16 / factor, bd);
|
||||||
} else {
|
} else {
|
||||||
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
|
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||||
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
|
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
|
||||||
|
@ -2079,10 +2079,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (cm->use_highbitdepth)
|
if (cm->use_highbitdepth)
|
||||||
vpx_highbd_convolve_copy(
|
vpx_highbd_convolve_copy(
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
|
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
|
||||||
best_pred->stride,
|
CONVERT_TO_SHORTPTR(this_mode_pred->data), this_mode_pred->stride,
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(this_mode_pred->data)),
|
NULL, 0, NULL, 0, bw, bh, xd->bd);
|
||||||
this_mode_pred->stride, NULL, 0, NULL, 0, bw, bh, xd->bd);
|
|
||||||
else
|
else
|
||||||
vpx_convolve_copy(best_pred->data, best_pred->stride,
|
vpx_convolve_copy(best_pred->data, best_pred->stride,
|
||||||
this_mode_pred->data, this_mode_pred->stride, NULL,
|
this_mode_pred->data, this_mode_pred->stride, NULL,
|
||||||
@ -2190,10 +2189,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (cm->use_highbitdepth)
|
if (cm->use_highbitdepth)
|
||||||
vpx_highbd_convolve_copy(
|
vpx_highbd_convolve_copy(
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
|
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
|
||||||
best_pred->stride,
|
CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, NULL, 0,
|
||||||
CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(pd->dst.buf)), pd->dst.stride,
|
bw, bh, xd->bd);
|
||||||
NULL, 0, NULL, 0, bw, bh, xd->bd);
|
|
||||||
else
|
else
|
||||||
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
|
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
|
||||||
pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
|
pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
|
||||||
|
@ -599,9 +599,8 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
|
|||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
vpx_highbd_convolve_copy(CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)),
|
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
|
||||||
dst_stride, recon, 32, NULL, 0, NULL, 0, bs,
|
32, NULL, 0, NULL, 0, bs, bs, xd->bd);
|
||||||
bs, xd->bd);
|
|
||||||
recon = CONVERT_TO_BYTEPTR(recon16);
|
recon = CONVERT_TO_BYTEPTR(recon16);
|
||||||
if (xd->lossless) {
|
if (xd->lossless) {
|
||||||
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
|
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
|
||||||
|
@ -135,18 +135,16 @@ static INLINE uint16x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1,
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_horiz_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_horiz_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, // unused
|
const int16_t *filter_y, // unused
|
||||||
int y_step_q4, // unused
|
int y_step_q4, // unused
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (x_step_q4 != 16) {
|
if (x_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_horiz_c(src8, src_stride, dst8, dst_stride, filter_x,
|
vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
||||||
} else {
|
} else {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
const int16x8_t filters = vld1q_s16(filter_x);
|
const int16x8_t filters = vld1q_s16(filter_x);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
uint16x8_t t0, t1, t2, t3;
|
uint16x8_t t0, t1, t2, t3;
|
||||||
@ -336,20 +334,17 @@ void vpx_highbd_convolve8_horiz_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_horiz_neon(const uint8_t *src8,
|
void vpx_highbd_convolve8_avg_horiz_neon(const uint16_t *src,
|
||||||
ptrdiff_t src_stride, uint8_t *dst8,
|
ptrdiff_t src_stride, uint16_t *dst,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, // unused
|
const int16_t *filter_y, // unused
|
||||||
int y_step_q4, // unused
|
int y_step_q4, // unused
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (x_step_q4 != 16) {
|
if (x_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_avg_horiz_c(src8, src_stride, dst8, dst_stride,
|
vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4,
|
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
||||||
w, h, bd);
|
|
||||||
} else {
|
} else {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
const int16x8_t filters = vld1q_s16(filter_x);
|
const int16x8_t filters = vld1q_s16(filter_x);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
uint16x8_t t0, t1, t2, t3;
|
uint16x8_t t0, t1, t2, t3;
|
||||||
@ -569,18 +564,16 @@ void vpx_highbd_convolve8_avg_horiz_neon(const uint8_t *src8,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_vert_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_vert_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, // unused
|
const int16_t *filter_x, // unused
|
||||||
int x_step_q4, // unused
|
int x_step_q4, // unused
|
||||||
const int16_t *filter_y, int y_step_q4,
|
const int16_t *filter_y, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (y_step_q4 != 16) {
|
if (y_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_vert_c(src8, src_stride, dst8, dst_stride, filter_x,
|
vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
||||||
} else {
|
} else {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
const int16x8_t filters = vld1q_s16(filter_y);
|
const int16x8_t filters = vld1q_s16(filter_y);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
|
|
||||||
@ -736,20 +729,17 @@ void vpx_highbd_convolve8_vert_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_vert_neon(const uint8_t *src8,
|
void vpx_highbd_convolve8_avg_vert_neon(const uint16_t *src,
|
||||||
ptrdiff_t src_stride, uint8_t *dst8,
|
ptrdiff_t src_stride, uint16_t *dst,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, // unused
|
const int16_t *filter_x, // unused
|
||||||
int x_step_q4, // unused
|
int x_step_q4, // unused
|
||||||
const int16_t *filter_y, int y_step_q4,
|
const int16_t *filter_y, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
if (y_step_q4 != 16) {
|
if (y_step_q4 != 16) {
|
||||||
vpx_highbd_convolve8_avg_vert_c(src8, src_stride, dst8, dst_stride,
|
vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
||||||
h, bd);
|
|
||||||
} else {
|
} else {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
const int16x8_t filters = vld1q_s16(filter_y);
|
const int16x8_t filters = vld1q_s16(filter_y);
|
||||||
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
|
||||||
|
|
||||||
|
@ -13,14 +13,11 @@
|
|||||||
#include "./vpx_dsp_rtcd.h"
|
#include "./vpx_dsp_rtcd.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
void vpx_highbd_convolve_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const int16_t *filter_x, int filter_x_stride,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
const int16_t *filter_y, int filter_y_stride,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter_x;
|
||||||
(void)filter_x_stride;
|
(void)filter_x_stride;
|
||||||
(void)filter_y;
|
(void)filter_y;
|
||||||
|
@ -13,14 +13,11 @@
|
|||||||
#include "./vpx_dsp_rtcd.h"
|
#include "./vpx_dsp_rtcd.h"
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
|
||||||
void vpx_highbd_convolve_copy_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const int16_t *filter_x, int filter_x_stride,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
const int16_t *filter_y, int filter_y_stride,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter_x;
|
||||||
(void)filter_x_stride;
|
(void)filter_x_stride;
|
||||||
(void)filter_y;
|
(void)filter_y;
|
||||||
|
@ -13,12 +13,11 @@
|
|||||||
#include "vpx_dsp/vpx_filter.h"
|
#include "vpx_dsp/vpx_filter.h"
|
||||||
#include "vpx_ports/mem.h"
|
#include "vpx_ports/mem.h"
|
||||||
|
|
||||||
void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
const int16_t *filter_y, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
|
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
|
||||||
// + 1 to make it divisible by 4
|
// + 1 to make it divisible by 4
|
||||||
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
|
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
|
||||||
@ -29,22 +28,20 @@ void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
* height and filter a multiple of 4 lines. Since this goes in to the temp
|
* height and filter a multiple of 4 lines. Since this goes in to the temp
|
||||||
* buffer which has lots of extra room and is subsequently discarded this is
|
* buffer which has lots of extra room and is subsequently discarded this is
|
||||||
* safe if somewhat less than ideal. */
|
* safe if somewhat less than ideal. */
|
||||||
vpx_highbd_convolve8_horiz_neon(
|
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
|
||||||
CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
|
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
||||||
w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
|
intermediate_height, bd);
|
||||||
|
|
||||||
/* Step into the temp buffer 3 lines to get the actual frame data */
|
/* Step into the temp buffer 3 lines to get the actual frame data */
|
||||||
vpx_highbd_convolve8_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
|
vpx_highbd_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
|
||||||
dst_stride, filter_x, x_step_q4, filter_y,
|
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
||||||
y_step_q4, w, h, bd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
const int16_t *filter_y, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
|
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
|
||||||
// + 1 to make it divisible by 4
|
// + 1 to make it divisible by 4
|
||||||
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
|
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
|
||||||
@ -54,10 +51,9 @@ void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
/* This implementation has the same issues as above. In addition, we only want
|
/* This implementation has the same issues as above. In addition, we only want
|
||||||
* to average the values after both passes.
|
* to average the values after both passes.
|
||||||
*/
|
*/
|
||||||
vpx_highbd_convolve8_horiz_neon(
|
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
|
||||||
CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
|
filter_x, x_step_q4, filter_y, y_step_q4, w,
|
||||||
w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
|
intermediate_height, bd);
|
||||||
vpx_highbd_convolve8_avg_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
|
vpx_highbd_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
|
||||||
dst_stride, filter_x, x_step_q4, filter_y,
|
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
||||||
y_step_q4, w, h, bd);
|
|
||||||
}
|
}
|
||||||
|
@ -319,13 +319,11 @@ void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
|
static void highbd_convolve_horiz(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *x_filters, int x0_q4,
|
const InterpKernel *x_filters, int x0_q4,
|
||||||
int x_step_q4, int w, int h, int bd) {
|
int x_step_q4, int w, int h, int bd) {
|
||||||
int x, y;
|
int x, y;
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
src -= SUBPEL_TAPS / 2 - 1;
|
src -= SUBPEL_TAPS / 2 - 1;
|
||||||
|
|
||||||
for (y = 0; y < h; ++y) {
|
for (y = 0; y < h; ++y) {
|
||||||
@ -343,13 +341,11 @@ static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
|
static void highbd_convolve_avg_horiz(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *x_filters, int x0_q4,
|
const InterpKernel *x_filters, int x0_q4,
|
||||||
int x_step_q4, int w, int h, int bd) {
|
int x_step_q4, int w, int h, int bd) {
|
||||||
int x, y;
|
int x, y;
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
src -= SUBPEL_TAPS / 2 - 1;
|
src -= SUBPEL_TAPS / 2 - 1;
|
||||||
|
|
||||||
for (y = 0; y < h; ++y) {
|
for (y = 0; y < h; ++y) {
|
||||||
@ -369,13 +365,11 @@ static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
|
static void highbd_convolve_vert(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *y_filters, int y0_q4,
|
const InterpKernel *y_filters, int y0_q4,
|
||||||
int y_step_q4, int w, int h, int bd) {
|
int y_step_q4, int w, int h, int bd) {
|
||||||
int x, y;
|
int x, y;
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
|
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
|
||||||
|
|
||||||
for (x = 0; x < w; ++x) {
|
for (x = 0; x < w; ++x) {
|
||||||
@ -395,13 +389,11 @@ static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
|
static void highbd_convolve_avg_vert(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *y_filters, int y0_q4,
|
const InterpKernel *y_filters, int y0_q4,
|
||||||
int y_step_q4, int w, int h, int bd) {
|
int y_step_q4, int w, int h, int bd) {
|
||||||
int x, y;
|
int x, y;
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
|
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
|
||||||
|
|
||||||
for (x = 0; x < w; ++x) {
|
for (x = 0; x < w; ++x) {
|
||||||
@ -423,8 +415,8 @@ static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
|
static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const InterpKernel *const x_filters, int x0_q4,
|
const InterpKernel *const x_filters, int x0_q4,
|
||||||
int x_step_q4, const InterpKernel *const y_filters,
|
int x_step_q4, const InterpKernel *const y_filters,
|
||||||
int y0_q4, int y_step_q4, int w, int h, int bd) {
|
int y0_q4, int y_step_q4, int w, int h, int bd) {
|
||||||
@ -449,16 +441,15 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
assert(y_step_q4 <= 32);
|
assert(y_step_q4 <= 32);
|
||||||
assert(x_step_q4 <= 32);
|
assert(x_step_q4 <= 32);
|
||||||
|
|
||||||
highbd_convolve_horiz(CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) -
|
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
|
||||||
src_stride * (SUBPEL_TAPS / 2 - 1)),
|
temp, 64, x_filters, x0_q4, x_step_q4, w,
|
||||||
src_stride, CAST_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
|
intermediate_height, bd);
|
||||||
x_step_q4, w, intermediate_height, bd);
|
highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
|
||||||
highbd_convolve_vert(CAST_TO_BYTEPTR(temp + 64 * (SUBPEL_TAPS / 2 - 1)), 64,
|
y_filters, y0_q4, y_step_q4, w, h, bd);
|
||||||
dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
const int16_t *filter_y, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
@ -472,8 +463,8 @@ void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
x_step_q4, w, h, bd);
|
x_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
const int16_t *filter_y, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
@ -487,8 +478,8 @@ void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
x_step_q4, w, h, bd);
|
x_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
const int16_t *filter_y, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
@ -502,8 +493,8 @@ void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
y_step_q4, w, h, bd);
|
y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
const int16_t *filter_y, int y_step_q4,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
@ -517,8 +508,8 @@ void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
y_step_q4, w, h, bd);
|
y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
const int16_t *filter_y, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
@ -531,8 +522,8 @@ void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
filters_y, y0_q4, y_step_q4, w, h, bd);
|
filters_y, y0_q4, y_step_q4, w, h, bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4, int w,
|
const int16_t *filter_y, int y_step_q4, int w,
|
||||||
int h, int bd) {
|
int h, int bd) {
|
||||||
@ -541,20 +532,18 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
assert(w <= 64);
|
assert(w <= 64);
|
||||||
assert(h <= 64);
|
assert(h <= 64);
|
||||||
|
|
||||||
vpx_highbd_convolve8_c(src, src_stride, CAST_TO_BYTEPTR(temp), 64, filter_x,
|
vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4,
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd);
|
filter_y, y_step_q4, w, h, bd);
|
||||||
vpx_highbd_convolve_avg_c(CAST_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL, 0,
|
vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h,
|
||||||
NULL, 0, w, h, bd);
|
bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const int16_t *filter_x, int filter_x_stride,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
const int16_t *filter_y, int filter_y_stride,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
int r;
|
int r;
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter_x;
|
||||||
(void)filter_x_stride;
|
(void)filter_x_stride;
|
||||||
@ -569,14 +558,12 @@ void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const int16_t *filter_x, int filter_x_stride,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
const int16_t *filter_y, int filter_y_stride,
|
||||||
int w, int h, int bd) {
|
int w, int h, int bd) {
|
||||||
int x, y;
|
int x, y;
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
|
|
||||||
(void)filter_x;
|
(void)filter_x;
|
||||||
(void)filter_x_stride;
|
(void)filter_x_stride;
|
||||||
|
@ -24,8 +24,8 @@ typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
|||||||
int h);
|
int h);
|
||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
|
typedef void (*highbd_convolve_fn_t)(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int x_step_q4,
|
const int16_t *filter_x, int x_step_q4,
|
||||||
const int16_t *filter_y, int y_step_q4,
|
const int16_t *filter_y, int y_step_q4,
|
||||||
int w, int h, int bd);
|
int w, int h, int bd);
|
||||||
|
@ -372,28 +372,28 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||||||
#
|
#
|
||||||
# Sub Pixel Filters
|
# Sub Pixel Filters
|
||||||
#
|
#
|
||||||
add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
|
specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
|
specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8 neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8 neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_horiz neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_horiz neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_vert neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_vert neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_avg neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_avg neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_avg_horiz neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_avg_horiz neon/, "$sse2_x86_64";
|
||||||
|
|
||||||
add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
|
||||||
specialize qw/vpx_highbd_convolve8_avg_vert neon/, "$sse2_x86_64";
|
specialize qw/vpx_highbd_convolve8_avg_vert neon/, "$sse2_x86_64";
|
||||||
} # CONFIG_VP9_HIGHBITDEPTH
|
} # CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
@ -103,12 +103,10 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
|
|||||||
|
|
||||||
#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
|
#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
|
||||||
void vpx_highbd_convolve8_##name##_##opt( \
|
void vpx_highbd_convolve8_##name##_##opt( \
|
||||||
const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, \
|
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
||||||
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
|
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
|
||||||
if (step_q4 == 16 && filter[3] != 128) { \
|
if (step_q4 == 16 && filter[3] != 128) { \
|
||||||
uint16_t *src = CAST_TO_SHORTPTR(src8); \
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8); \
|
|
||||||
if (filter[0] | filter[1] | filter[2]) { \
|
if (filter[0] | filter[1] | filter[2]) { \
|
||||||
while (w >= 16) { \
|
while (w >= 16) { \
|
||||||
vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
|
vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
|
||||||
@ -156,43 +154,42 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
|
|||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
if (w) { \
|
if (w) { \
|
||||||
vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
|
vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
|
||||||
filter_x, x_step_q4, filter_y, \
|
filter_x, x_step_q4, filter_y, \
|
||||||
y_step_q4, w, h, bd); \
|
y_step_q4, w, h, bd); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HIGH_FUN_CONV_2D(avg, opt) \
|
#define HIGH_FUN_CONV_2D(avg, opt) \
|
||||||
void vpx_highbd_convolve8_##avg##opt( \
|
void vpx_highbd_convolve8_##avg##opt( \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
|
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
|
||||||
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
|
||||||
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
|
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
|
||||||
assert(w <= 64); \
|
assert(w <= 64); \
|
||||||
assert(h <= 64); \
|
assert(h <= 64); \
|
||||||
if (x_step_q4 == 16 && y_step_q4 == 16) { \
|
if (x_step_q4 == 16 && y_step_q4 == 16) { \
|
||||||
if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
|
if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
|
||||||
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
|
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
|
||||||
vpx_highbd_convolve8_horiz_##opt( \
|
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
|
||||||
CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) - 3 * src_stride), \
|
fdata2, 64, filter_x, x_step_q4, \
|
||||||
src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
|
filter_y, y_step_q4, w, h + 7, bd); \
|
||||||
filter_y, y_step_q4, w, h + 7, bd); \
|
vpx_highbd_convolve8_##avg##vert_##opt( \
|
||||||
vpx_highbd_convolve8_##avg##vert_##opt( \
|
fdata2 + 192, 64, dst, dst_stride, filter_x, x_step_q4, filter_y, \
|
||||||
CAST_TO_BYTEPTR(fdata2 + 192), 64, dst, dst_stride, filter_x, \
|
y_step_q4, w, h, bd); \
|
||||||
x_step_q4, filter_y, y_step_q4, w, h, bd); \
|
} else { \
|
||||||
} else { \
|
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
|
||||||
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
|
vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
|
||||||
vpx_highbd_convolve8_horiz_##opt( \
|
filter_x, x_step_q4, filter_y, \
|
||||||
src, src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
|
y_step_q4, w, h + 1, bd); \
|
||||||
filter_y, y_step_q4, w, h + 1, bd); \
|
vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
|
||||||
vpx_highbd_convolve8_##avg##vert_##opt( \
|
filter_x, x_step_q4, filter_y, \
|
||||||
CAST_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x, x_step_q4, \
|
y_step_q4, w, h, bd); \
|
||||||
filter_y, y_step_q4, w, h, bd); \
|
} \
|
||||||
} \
|
} else { \
|
||||||
} else { \
|
vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
|
||||||
vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
|
filter_x, x_step_q4, filter_y, y_step_q4, \
|
||||||
filter_x, x_step_q4, filter_y, y_step_q4, \
|
w, h, bd); \
|
||||||
w, h, bd); \
|
} \
|
||||||
} \
|
|
||||||
}
|
}
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
@ -16,13 +16,11 @@
|
|||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// Copy and average
|
// Copy and average
|
||||||
|
|
||||||
void vpx_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const int16_t *filter_x, int filter_x_stride,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
const int16_t *filter_y, int filter_y_stride,
|
||||||
int width, int h, int bd) {
|
int width, int h, int bd) {
|
||||||
const uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
(void)filter_x;
|
(void)filter_x;
|
||||||
(void)filter_y;
|
(void)filter_y;
|
||||||
(void)filter_x_stride;
|
(void)filter_x_stride;
|
||||||
@ -99,13 +97,11 @@ void vpx_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vpx_highbd_convolve_avg_avx2(const uint8_t *src8, ptrdiff_t src_stride,
|
void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride,
|
||||||
uint8_t *dst8, ptrdiff_t dst_stride,
|
uint16_t *dst, ptrdiff_t dst_stride,
|
||||||
const int16_t *filter_x, int filter_x_stride,
|
const int16_t *filter_x, int filter_x_stride,
|
||||||
const int16_t *filter_y, int filter_y_stride,
|
const int16_t *filter_y, int filter_y_stride,
|
||||||
int width, int h, int bd) {
|
int width, int h, int bd) {
|
||||||
uint16_t *src = CAST_TO_SHORTPTR(src8);
|
|
||||||
uint16_t *dst = CAST_TO_SHORTPTR(dst8);
|
|
||||||
(void)filter_x;
|
(void)filter_x;
|
||||||
(void)filter_y;
|
(void)filter_y;
|
||||||
(void)filter_x_stride;
|
(void)filter_x_stride;
|
||||||
|
Loading…
Reference in New Issue
Block a user