vp9: add support for resolution changes in inter frames.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at> (cherry picked from commit e8b4f6d6befc5062db74916ea8a4d830e83022a8) Signed-off-by: Andreas Cadhalpun <Andreas.Cadhalpun@googlemail.com>
This commit is contained in:
parent
48d388b033
commit
cf4b0fb4d6
292
libavcodec/vp9.c
292
libavcodec/vp9.c
@ -240,7 +240,7 @@ typedef struct VP9Context {
|
|||||||
// whole-frame cache
|
// whole-frame cache
|
||||||
uint8_t *intra_pred_data[3];
|
uint8_t *intra_pred_data[3];
|
||||||
struct VP9Filter *lflvl;
|
struct VP9Filter *lflvl;
|
||||||
DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
|
DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144];
|
||||||
|
|
||||||
// block reconstruction intermediates
|
// block reconstruction intermediates
|
||||||
int block_alloc_using_2pass;
|
int block_alloc_using_2pass;
|
||||||
@ -249,6 +249,8 @@ typedef struct VP9Context {
|
|||||||
struct { int x, y; } min_mv, max_mv;
|
struct { int x, y; } min_mv, max_mv;
|
||||||
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
|
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
|
||||||
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
|
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
|
||||||
|
uint16_t mvscale[3][2];
|
||||||
|
uint8_t mvstep[3][2];
|
||||||
} VP9Context;
|
} VP9Context;
|
||||||
|
|
||||||
static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
|
static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
|
||||||
@ -583,6 +585,26 @@ static int decode_frame_header(AVCodecContext *ctx,
|
|||||||
s->varcompref[1] = 2;
|
s->varcompref[1] = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < 3; i++) {
|
||||||
|
AVFrame *ref = s->refs[s->refidx[i]].f;
|
||||||
|
int refw = ref->width, refh = ref->height;
|
||||||
|
|
||||||
|
if (refw == w && refh == h) {
|
||||||
|
s->mvscale[i][0] = s->mvscale[i][1] = 0;
|
||||||
|
} else {
|
||||||
|
if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
|
||||||
|
av_log(ctx, AV_LOG_ERROR,
|
||||||
|
"Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
|
||||||
|
refw, refh, w, h);
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
|
}
|
||||||
|
s->mvscale[i][0] = (refw << 14) / w;
|
||||||
|
s->mvscale[i][1] = (refh << 14) / h;
|
||||||
|
s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
|
||||||
|
s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
|
s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
|
||||||
@ -2537,7 +2559,113 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
|
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
|
||||||
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
|
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||||
|
ThreadFrame *ref_frame,
|
||||||
|
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
|
||||||
|
int bw, int bh, int w, int h,
|
||||||
|
const uint16_t *scale, const uint8_t *step)
|
||||||
|
{
|
||||||
|
#define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
|
||||||
|
// BUG libvpx seems to scale the two components separately. This introduces
|
||||||
|
// rounding errors but we have to reproduce them to be exactly compatible
|
||||||
|
// with the output from libvpx...
|
||||||
|
int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
|
||||||
|
int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
|
||||||
|
int refbw_m1, refbh_m1;
|
||||||
|
int th;
|
||||||
|
|
||||||
|
y = my >> 4;
|
||||||
|
x = mx >> 4;
|
||||||
|
ref += y * ref_stride + x;
|
||||||
|
mx &= 15;
|
||||||
|
my &= 15;
|
||||||
|
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
|
||||||
|
refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
|
||||||
|
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
|
||||||
|
// we use +7 because the last 7 pixels of each sbrow can be changed in
|
||||||
|
// the longest loopfilter of the next sbrow
|
||||||
|
th = (y + refbh_m1 + 4 + 7) >> 6;
|
||||||
|
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
|
||||||
|
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
|
||||||
|
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
|
||||||
|
ref - 3 * ref_stride - 3,
|
||||||
|
144, ref_stride,
|
||||||
|
refbw_m1 + 8, refbh_m1 + 8,
|
||||||
|
x - 3, y - 3, w, h);
|
||||||
|
ref = s->edge_emu_buffer + 3 * 144 + 3;
|
||||||
|
ref_stride = 144;
|
||||||
|
}
|
||||||
|
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
|
||||||
|
uint8_t *dst_u, uint8_t *dst_v,
|
||||||
|
ptrdiff_t dst_stride,
|
||||||
|
const uint8_t *ref_u, ptrdiff_t src_stride_u,
|
||||||
|
const uint8_t *ref_v, ptrdiff_t src_stride_v,
|
||||||
|
ThreadFrame *ref_frame,
|
||||||
|
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
|
||||||
|
int bw, int bh, int w, int h,
|
||||||
|
const uint16_t *scale, const uint8_t *step)
|
||||||
|
{
|
||||||
|
// BUG https://code.google.com/p/webm/issues/detail?id=820
|
||||||
|
int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
|
||||||
|
int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
|
||||||
|
#undef scale_mv
|
||||||
|
int refbw_m1, refbh_m1;
|
||||||
|
int th;
|
||||||
|
|
||||||
|
y = my >> 4;
|
||||||
|
x = mx >> 4;
|
||||||
|
ref_u += y * src_stride_u + x;
|
||||||
|
ref_v += y * src_stride_v + x;
|
||||||
|
mx &= 15;
|
||||||
|
my &= 15;
|
||||||
|
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
|
||||||
|
refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
|
||||||
|
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
|
||||||
|
// we use +7 because the last 7 pixels of each sbrow can be changed in
|
||||||
|
// the longest loopfilter of the next sbrow
|
||||||
|
th = (y + refbh_m1 + 4 + 7) >> 5;
|
||||||
|
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
|
||||||
|
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
|
||||||
|
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
|
||||||
|
ref_u - 3 * src_stride_u - 3,
|
||||||
|
144, src_stride_u,
|
||||||
|
refbw_m1 + 8, refbh_m1 + 8,
|
||||||
|
x - 3, y - 3, w, h);
|
||||||
|
ref_u = s->edge_emu_buffer + 3 * 144 + 3;
|
||||||
|
smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
|
||||||
|
|
||||||
|
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
|
||||||
|
ref_v - 3 * src_stride_v - 3,
|
||||||
|
144, src_stride_v,
|
||||||
|
refbw_m1 + 8, refbh_m1 + 8,
|
||||||
|
x - 3, y - 3, w, h);
|
||||||
|
ref_v = s->edge_emu_buffer + 3 * 144 + 3;
|
||||||
|
smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
|
||||||
|
} else {
|
||||||
|
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
|
||||||
|
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define FN(x) x##_scaled
|
||||||
|
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
|
||||||
|
mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
|
||||||
|
mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
|
||||||
|
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
|
||||||
|
row, col, mv, bw, bh, w, h, i) \
|
||||||
|
mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
|
||||||
|
row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
|
||||||
|
#include "vp9_mc_template.c"
|
||||||
|
#undef mc_luma_dir
|
||||||
|
#undef mc_chroma_dir
|
||||||
|
#undef FN
|
||||||
|
|
||||||
|
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
|
||||||
uint8_t *dst, ptrdiff_t dst_stride,
|
uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const uint8_t *ref, ptrdiff_t ref_stride,
|
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||||
ThreadFrame *ref_frame,
|
ThreadFrame *ref_frame,
|
||||||
@ -2569,7 +2697,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
|
|||||||
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
|
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
|
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
|
||||||
uint8_t *dst_u, uint8_t *dst_v,
|
uint8_t *dst_u, uint8_t *dst_v,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const uint8_t *ref_u, ptrdiff_t src_stride_u,
|
const uint8_t *ref_u, ptrdiff_t src_stride_u,
|
||||||
@ -2614,156 +2742,32 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define FN(x) x
|
||||||
|
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
|
||||||
|
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
|
||||||
|
mv, bw, bh, w, h)
|
||||||
|
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
|
||||||
|
row, col, mv, bw, bh, w, h, i) \
|
||||||
|
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
|
||||||
|
row, col, mv, bw, bh, w, h)
|
||||||
|
#include "vp9_mc_template.c"
|
||||||
|
#undef mc_luma_dir_dir
|
||||||
|
#undef mc_chroma_dir_dir
|
||||||
|
#undef FN
|
||||||
|
|
||||||
static void inter_recon(AVCodecContext *ctx)
|
static void inter_recon(AVCodecContext *ctx)
|
||||||
{
|
{
|
||||||
static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
|
|
||||||
{ 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
|
|
||||||
{ 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
|
|
||||||
};
|
|
||||||
VP9Context *s = ctx->priv_data;
|
VP9Context *s = ctx->priv_data;
|
||||||
VP9Block *b = s->b;
|
VP9Block *b = s->b;
|
||||||
int row = s->row, col = s->col;
|
int row = s->row, col = s->col;
|
||||||
ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
|
|
||||||
AVFrame *ref1 = tref1->f, *ref2;
|
|
||||||
int w1 = ref1->width, h1 = ref1->height, w2, h2;
|
|
||||||
ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
|
|
||||||
|
|
||||||
if (b->comp) {
|
if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
|
||||||
tref2 = &s->refs[s->refidx[b->ref[1]]];
|
inter_pred_scaled(ctx);
|
||||||
ref2 = tref2->f;
|
|
||||||
w2 = ref2->width;
|
|
||||||
h2 = ref2->height;
|
|
||||||
}
|
|
||||||
|
|
||||||
// y inter pred
|
|
||||||
if (b->bs > BS_8x8) {
|
|
||||||
if (b->bs == BS_8x4) {
|
|
||||||
mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
|
|
||||||
s->dst[0] + 4 * ls_y, ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
(row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
|
|
||||||
|
|
||||||
if (b->comp) {
|
|
||||||
mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
|
|
||||||
s->dst[0] + 4 * ls_y, ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
(row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
|
|
||||||
}
|
|
||||||
} else if (b->bs == BS_4x8) {
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
|
|
||||||
|
|
||||||
if (b->comp) {
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
av_assert2(b->bs == BS_4x4);
|
inter_pred(ctx);
|
||||||
|
|
||||||
// FIXME if two horizontally adjacent blocks have the same MV,
|
|
||||||
// do a w8 instead of a w4 call
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
|
|
||||||
s->dst[0] + 4 * ls_y, ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
|
|
||||||
s->dst[0] + 4 * ls_y + 4, ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
|
|
||||||
|
|
||||||
if (b->comp) {
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
|
|
||||||
s->dst[0] + 4 * ls_y, ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
|
|
||||||
mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
|
|
||||||
s->dst[0] + 4 * ls_y + 4, ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int bwl = bwlog_tab[0][b->bs];
|
|
||||||
int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
|
|
||||||
|
|
||||||
mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
|
|
||||||
ref1->data[0], ref1->linesize[0], tref1,
|
|
||||||
row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
|
|
||||||
|
|
||||||
if (b->comp)
|
|
||||||
mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
|
|
||||||
ref2->data[0], ref2->linesize[0], tref2,
|
|
||||||
row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// uv inter pred
|
|
||||||
{
|
|
||||||
int bwl = bwlog_tab[1][b->bs];
|
|
||||||
int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
|
|
||||||
VP56mv mvuv;
|
|
||||||
|
|
||||||
w1 = (w1 + 1) >> 1;
|
|
||||||
h1 = (h1 + 1) >> 1;
|
|
||||||
if (b->comp) {
|
|
||||||
w2 = (w2 + 1) >> 1;
|
|
||||||
h2 = (h2 + 1) >> 1;
|
|
||||||
}
|
|
||||||
if (b->bs > BS_8x8) {
|
|
||||||
mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
|
|
||||||
mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
|
|
||||||
} else {
|
|
||||||
mvuv = b->mv[0][0];
|
|
||||||
}
|
|
||||||
|
|
||||||
mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
|
|
||||||
s->dst[1], s->dst[2], ls_uv,
|
|
||||||
ref1->data[1], ref1->linesize[1],
|
|
||||||
ref1->data[2], ref1->linesize[2], tref1,
|
|
||||||
row << 2, col << 2, &mvuv, bw, bh, w1, h1);
|
|
||||||
|
|
||||||
if (b->comp) {
|
|
||||||
if (b->bs > BS_8x8) {
|
|
||||||
mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
|
|
||||||
mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
|
|
||||||
} else {
|
|
||||||
mvuv = b->mv[0][1];
|
|
||||||
}
|
|
||||||
mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
|
|
||||||
s->dst[1], s->dst[2], ls_uv,
|
|
||||||
ref2->data[1], ref2->linesize[1],
|
|
||||||
ref2->data[2], ref2->linesize[2], tref2,
|
|
||||||
row << 2, col << 2, &mvuv, bw, bh, w2, h2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!b->skip) {
|
if (!b->skip) {
|
||||||
/* mostly copied intra_reconn() */
|
/* mostly copied intra_recon() */
|
||||||
|
|
||||||
int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
|
int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
|
||||||
int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
|
int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
|
||||||
|
171
libavcodec/vp9_mc_template.c
Normal file
171
libavcodec/vp9_mc_template.c
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
/*
|
||||||
|
* VP9 compatible video decoder
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
|
||||||
|
* Copyright (C) 2013 Clément Bœsch <u pkh me>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void FN(inter_pred)(AVCodecContext *ctx)
|
||||||
|
{
|
||||||
|
static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
|
||||||
|
{ 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
|
||||||
|
{ 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
|
||||||
|
};
|
||||||
|
VP9Context *s = ctx->priv_data;
|
||||||
|
VP9Block *b = s->b;
|
||||||
|
int row = s->row, col = s->col;
|
||||||
|
ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
|
||||||
|
AVFrame *ref1 = tref1->f, *ref2;
|
||||||
|
int w1 = ref1->width, h1 = ref1->height, w2, h2;
|
||||||
|
ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
|
||||||
|
|
||||||
|
if (b->comp) {
|
||||||
|
tref2 = &s->refs[s->refidx[b->ref[1]]];
|
||||||
|
ref2 = tref2->f;
|
||||||
|
w2 = ref2->width;
|
||||||
|
h2 = ref2->height;
|
||||||
|
}
|
||||||
|
|
||||||
|
// y inter pred
|
||||||
|
if (b->bs > BS_8x8) {
|
||||||
|
if (b->bs == BS_8x4) {
|
||||||
|
mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1, 0);
|
||||||
|
mc_luma_dir(s, mc[3][b->filter][0],
|
||||||
|
s->dst[0] + 4 * ls_y, ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
(row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1, 0);
|
||||||
|
|
||||||
|
if (b->comp) {
|
||||||
|
mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2, 1);
|
||||||
|
mc_luma_dir(s, mc[3][b->filter][1],
|
||||||
|
s->dst[0] + 4 * ls_y, ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
(row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2, 1);
|
||||||
|
}
|
||||||
|
} else if (b->bs == BS_4x8) {
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1, 0);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
|
||||||
|
|
||||||
|
if (b->comp) {
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2, 1);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
av_assert2(b->bs == BS_4x4);
|
||||||
|
|
||||||
|
// FIXME if two horizontally adjacent blocks have the same MV,
|
||||||
|
// do a w8 instead of a w4 call
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1, 0);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1, 0);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][0],
|
||||||
|
s->dst[0] + 4 * ls_y, ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
(row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1, 0);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][0],
|
||||||
|
s->dst[0] + 4 * ls_y + 4, ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
|
||||||
|
|
||||||
|
if (b->comp) {
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2, 1);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2, 1);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][1],
|
||||||
|
s->dst[0] + 4 * ls_y, ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
(row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2, 1);
|
||||||
|
mc_luma_dir(s, mc[4][b->filter][1],
|
||||||
|
s->dst[0] + 4 * ls_y + 4, ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int bwl = bwlog_tab[0][b->bs];
|
||||||
|
int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
|
||||||
|
|
||||||
|
mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
|
||||||
|
ref1->data[0], ref1->linesize[0], tref1,
|
||||||
|
row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1, 0);
|
||||||
|
|
||||||
|
if (b->comp)
|
||||||
|
mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
|
||||||
|
ref2->data[0], ref2->linesize[0], tref2,
|
||||||
|
row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// uv inter pred
|
||||||
|
{
|
||||||
|
int bwl = bwlog_tab[1][b->bs];
|
||||||
|
int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
|
||||||
|
VP56mv mvuv;
|
||||||
|
|
||||||
|
w1 = (w1 + 1) >> 1;
|
||||||
|
h1 = (h1 + 1) >> 1;
|
||||||
|
if (b->comp) {
|
||||||
|
w2 = (w2 + 1) >> 1;
|
||||||
|
h2 = (h2 + 1) >> 1;
|
||||||
|
}
|
||||||
|
if (b->bs > BS_8x8) {
|
||||||
|
mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
|
||||||
|
mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
|
||||||
|
} else {
|
||||||
|
mvuv = b->mv[0][0];
|
||||||
|
}
|
||||||
|
|
||||||
|
mc_chroma_dir(s, mc[bwl][b->filter][0],
|
||||||
|
s->dst[1], s->dst[2], ls_uv,
|
||||||
|
ref1->data[1], ref1->linesize[1],
|
||||||
|
ref1->data[2], ref1->linesize[2], tref1,
|
||||||
|
row << 2, col << 2, &mvuv, bw, bh, w1, h1, 0);
|
||||||
|
|
||||||
|
if (b->comp) {
|
||||||
|
if (b->bs > BS_8x8) {
|
||||||
|
mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
|
||||||
|
mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
|
||||||
|
} else {
|
||||||
|
mvuv = b->mv[0][1];
|
||||||
|
}
|
||||||
|
mc_chroma_dir(s, mc[bwl][b->filter][1],
|
||||||
|
s->dst[1], s->dst[2], ls_uv,
|
||||||
|
ref2->data[1], ref2->linesize[1],
|
||||||
|
ref2->data[2], ref2->linesize[2], tref2,
|
||||||
|
row << 2, col << 2, &mvuv, bw, bh, w2, h2, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2008 Michael Niedermayer
|
* VP9 compatible video decoder
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
|
||||||
|
* Copyright (C) 2013 Clément Bœsch <u pkh me>
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
|
@ -1707,8 +1707,9 @@ copy_avg_fn(4)
|
|||||||
#undef fpel_fn
|
#undef fpel_fn
|
||||||
#undef copy_avg_fn
|
#undef copy_avg_fn
|
||||||
|
|
||||||
static const int8_t vp9_subpel_filters[3][15][8] = {
|
static const int16_t vp9_subpel_filters[3][16][8] = {
|
||||||
[FILTER_8TAP_REGULAR] = {
|
[FILTER_8TAP_REGULAR] = {
|
||||||
|
{ 0, 0, 0, 128, 0, 0, 0, 0 },
|
||||||
{ 0, 1, -5, 126, 8, -3, 1, 0 },
|
{ 0, 1, -5, 126, 8, -3, 1, 0 },
|
||||||
{ -1, 3, -10, 122, 18, -6, 2, 0 },
|
{ -1, 3, -10, 122, 18, -6, 2, 0 },
|
||||||
{ -1, 4, -13, 118, 27, -9, 3, -1 },
|
{ -1, 4, -13, 118, 27, -9, 3, -1 },
|
||||||
@ -1725,6 +1726,7 @@ static const int8_t vp9_subpel_filters[3][15][8] = {
|
|||||||
{ 0, 2, -6, 18, 122, -10, 3, -1 },
|
{ 0, 2, -6, 18, 122, -10, 3, -1 },
|
||||||
{ 0, 1, -3, 8, 126, -5, 1, 0 },
|
{ 0, 1, -3, 8, 126, -5, 1, 0 },
|
||||||
}, [FILTER_8TAP_SHARP] = {
|
}, [FILTER_8TAP_SHARP] = {
|
||||||
|
{ 0, 0, 0, 128, 0, 0, 0, 0 },
|
||||||
{ -1, 3, -7, 127, 8, -3, 1, 0 },
|
{ -1, 3, -7, 127, 8, -3, 1, 0 },
|
||||||
{ -2, 5, -13, 125, 17, -6, 3, -1 },
|
{ -2, 5, -13, 125, 17, -6, 3, -1 },
|
||||||
{ -3, 7, -17, 121, 27, -10, 5, -2 },
|
{ -3, 7, -17, 121, 27, -10, 5, -2 },
|
||||||
@ -1741,6 +1743,7 @@ static const int8_t vp9_subpel_filters[3][15][8] = {
|
|||||||
{ -1, 3, -6, 17, 125, -13, 5, -2 },
|
{ -1, 3, -6, 17, 125, -13, 5, -2 },
|
||||||
{ 0, 1, -3, 8, 127, -7, 3, -1 },
|
{ 0, 1, -3, 8, 127, -7, 3, -1 },
|
||||||
}, [FILTER_8TAP_SMOOTH] = {
|
}, [FILTER_8TAP_SMOOTH] = {
|
||||||
|
{ 0, 0, 0, 128, 0, 0, 0, 0 },
|
||||||
{ -3, -1, 32, 64, 38, 1, -3, 0 },
|
{ -3, -1, 32, 64, 38, 1, -3, 0 },
|
||||||
{ -2, -2, 29, 63, 41, 2, -3, 0 },
|
{ -2, -2, 29, 63, 41, 2, -3, 0 },
|
||||||
{ -2, -2, 26, 63, 43, 4, -4, 0 },
|
{ -2, -2, 26, 63, 43, 4, -4, 0 },
|
||||||
@ -1772,7 +1775,7 @@ static const int8_t vp9_subpel_filters[3][15][8] = {
|
|||||||
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
|
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const uint8_t *src, ptrdiff_t src_stride,
|
const uint8_t *src, ptrdiff_t src_stride,
|
||||||
int w, int h, ptrdiff_t ds,
|
int w, int h, ptrdiff_t ds,
|
||||||
const int8_t *filter, int avg)
|
const int16_t *filter, int avg)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
int x;
|
int x;
|
||||||
@ -1792,7 +1795,7 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
|
|||||||
#define filter_8tap_1d_fn(opn, opa, dir, ds) \
|
#define filter_8tap_1d_fn(opn, opa, dir, ds) \
|
||||||
static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, \
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
int w, int h, const int8_t *filter) \
|
int w, int h, const int16_t *filter) \
|
||||||
{ \
|
{ \
|
||||||
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
|
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
|
||||||
}
|
}
|
||||||
@ -1806,8 +1809,8 @@ filter_8tap_1d_fn(avg, 1, h, 1)
|
|||||||
|
|
||||||
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
|
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const uint8_t *src, ptrdiff_t src_stride,
|
const uint8_t *src, ptrdiff_t src_stride,
|
||||||
int w, int h, const int8_t *filterx,
|
int w, int h, const int16_t *filterx,
|
||||||
const int8_t *filtery, int avg)
|
const int16_t *filtery, int avg)
|
||||||
{
|
{
|
||||||
int tmp_h = h + 7;
|
int tmp_h = h + 7;
|
||||||
uint8_t tmp[64 * 71], *tmp_ptr = tmp;
|
uint8_t tmp[64 * 71], *tmp_ptr = tmp;
|
||||||
@ -1842,8 +1845,8 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
|
|||||||
#define filter_8tap_2d_fn(opn, opa) \
|
#define filter_8tap_2d_fn(opn, opa) \
|
||||||
static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, \
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
int w, int h, const int8_t *filterx, \
|
int w, int h, const int16_t *filterx, \
|
||||||
const int8_t *filtery) \
|
const int16_t *filtery) \
|
||||||
{ \
|
{ \
|
||||||
do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
|
do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
|
||||||
}
|
}
|
||||||
@ -1853,15 +1856,13 @@ filter_8tap_2d_fn(avg, 1)
|
|||||||
|
|
||||||
#undef filter_8tap_2d_fn
|
#undef filter_8tap_2d_fn
|
||||||
|
|
||||||
#undef FILTER_8TAP
|
|
||||||
|
|
||||||
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
|
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
|
||||||
static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, \
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
int h, int mx, int my) \
|
int h, int mx, int my) \
|
||||||
{ \
|
{ \
|
||||||
avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
|
avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
|
||||||
vp9_subpel_filters[type_idx][dir_m - 1]); \
|
vp9_subpel_filters[type_idx][dir_m]); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define filter_fn_2d(sz, type, type_idx, avg) \
|
#define filter_fn_2d(sz, type, type_idx, avg) \
|
||||||
@ -1870,8 +1871,8 @@ static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
|||||||
int h, int mx, int my) \
|
int h, int mx, int my) \
|
||||||
{ \
|
{ \
|
||||||
avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
|
avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
|
||||||
vp9_subpel_filters[type_idx][mx - 1], \
|
vp9_subpel_filters[type_idx][mx], \
|
||||||
vp9_subpel_filters[type_idx][my - 1]); \
|
vp9_subpel_filters[type_idx][my]); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FILTER_BILIN(src, x, mxy, stride) \
|
#define FILTER_BILIN(src, x, mxy, stride) \
|
||||||
@ -1957,8 +1958,6 @@ bilin_2d_fn(avg, 1)
|
|||||||
|
|
||||||
#undef bilin_2d_fn
|
#undef bilin_2d_fn
|
||||||
|
|
||||||
#undef FILTER_BILIN
|
|
||||||
|
|
||||||
#define bilinf_fn_1d(sz, dir, dir_m, avg) \
|
#define bilinf_fn_1d(sz, dir, dir_m, avg) \
|
||||||
static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
const uint8_t *src, ptrdiff_t src_stride, \
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
@ -2053,12 +2052,190 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
|
|||||||
#undef init_subpel3
|
#undef init_subpel3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static av_always_inline void do_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
|
const uint8_t *src, ptrdiff_t src_stride,
|
||||||
|
int w, int h, int mx, int my,
|
||||||
|
int dx, int dy, int avg,
|
||||||
|
const int16_t (*filters)[8])
|
||||||
|
{
|
||||||
|
int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
|
||||||
|
uint8_t tmp[64 * 135], *tmp_ptr = tmp;
|
||||||
|
|
||||||
|
src -= src_stride * 3;
|
||||||
|
do {
|
||||||
|
int x;
|
||||||
|
int imx = mx, ioff = 0;
|
||||||
|
|
||||||
|
for (x = 0; x < w; x++) {
|
||||||
|
tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1);
|
||||||
|
imx += dx;
|
||||||
|
ioff += imx >> 4;
|
||||||
|
imx &= 0xf;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_ptr += 64;
|
||||||
|
src += src_stride;
|
||||||
|
} while (--tmp_h);
|
||||||
|
|
||||||
|
tmp_ptr = tmp + 64 * 3;
|
||||||
|
do {
|
||||||
|
int x;
|
||||||
|
const int16_t *filter = filters[my];
|
||||||
|
|
||||||
|
for (x = 0; x < w; x++)
|
||||||
|
if (avg) {
|
||||||
|
dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1;
|
||||||
|
} else {
|
||||||
|
dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
my += dy;
|
||||||
|
tmp_ptr += (my >> 4) * 64;
|
||||||
|
my &= 0xf;
|
||||||
|
dst += dst_stride;
|
||||||
|
} while (--h);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define scaled_filter_8tap_fn(opn, opa) \
|
||||||
|
static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
|
int w, int h, int mx, int my, int dx, int dy, \
|
||||||
|
const int16_t (*filters)[8]) \
|
||||||
|
{ \
|
||||||
|
do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
|
||||||
|
opa, filters); \
|
||||||
|
}
|
||||||
|
|
||||||
|
scaled_filter_8tap_fn(put, 0)
|
||||||
|
scaled_filter_8tap_fn(avg, 1)
|
||||||
|
|
||||||
|
#undef scaled_filter_8tap_fn
|
||||||
|
|
||||||
|
#undef FILTER_8TAP
|
||||||
|
|
||||||
|
#define scaled_filter_fn(sz, type, type_idx, avg) \
|
||||||
|
static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
|
int h, int mx, int my, int dx, int dy) \
|
||||||
|
{ \
|
||||||
|
avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
|
||||||
|
vp9_subpel_filters[type_idx]); \
|
||||||
|
}
|
||||||
|
|
||||||
|
static av_always_inline void do_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
|
const uint8_t *src, ptrdiff_t src_stride,
|
||||||
|
int w, int h, int mx, int my,
|
||||||
|
int dx, int dy, int avg)
|
||||||
|
{
|
||||||
|
uint8_t tmp[64 * 129], *tmp_ptr = tmp;
|
||||||
|
int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
|
||||||
|
|
||||||
|
do {
|
||||||
|
int x;
|
||||||
|
int imx = mx, ioff = 0;
|
||||||
|
|
||||||
|
for (x = 0; x < w; x++) {
|
||||||
|
tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1);
|
||||||
|
imx += dx;
|
||||||
|
ioff += imx >> 4;
|
||||||
|
imx &= 0xf;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_ptr += 64;
|
||||||
|
src += src_stride;
|
||||||
|
} while (--tmp_h);
|
||||||
|
|
||||||
|
tmp_ptr = tmp;
|
||||||
|
do {
|
||||||
|
int x;
|
||||||
|
|
||||||
|
for (x = 0; x < w; x++)
|
||||||
|
if (avg) {
|
||||||
|
dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
|
||||||
|
} else {
|
||||||
|
dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
my += dy;
|
||||||
|
tmp_ptr += (my >> 4) * 64;
|
||||||
|
my &= 0xf;
|
||||||
|
dst += dst_stride;
|
||||||
|
} while (--h);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define scaled_bilin_fn(opn, opa) \
|
||||||
|
static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
|
int w, int h, int mx, int my, int dx, int dy) \
|
||||||
|
{ \
|
||||||
|
do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
|
||||||
|
}
|
||||||
|
|
||||||
|
scaled_bilin_fn(put, 0)
|
||||||
|
scaled_bilin_fn(avg, 1)
|
||||||
|
|
||||||
|
#undef scaled_bilin_fn
|
||||||
|
|
||||||
|
#undef FILTER_BILIN
|
||||||
|
|
||||||
|
#define scaled_bilinf_fn(sz, avg) \
|
||||||
|
static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||||
|
const uint8_t *src, ptrdiff_t src_stride, \
|
||||||
|
int h, int mx, int my, int dx, int dy) \
|
||||||
|
{ \
|
||||||
|
avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define scaled_filter_fns(sz, avg) \
|
||||||
|
scaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \
|
||||||
|
scaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
|
||||||
|
scaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \
|
||||||
|
scaled_bilinf_fn(sz, avg)
|
||||||
|
|
||||||
|
#define scaled_filter_fn_set(avg) \
|
||||||
|
scaled_filter_fns(64, avg) \
|
||||||
|
scaled_filter_fns(32, avg) \
|
||||||
|
scaled_filter_fns(16, avg) \
|
||||||
|
scaled_filter_fns(8, avg) \
|
||||||
|
scaled_filter_fns(4, avg)
|
||||||
|
|
||||||
|
scaled_filter_fn_set(put)
|
||||||
|
scaled_filter_fn_set(avg)
|
||||||
|
|
||||||
|
#undef scaled_filter_fns
|
||||||
|
#undef scaled_filter_fn_set
|
||||||
|
#undef scaled_filter_fn
|
||||||
|
#undef scaled_bilinf_fn
|
||||||
|
|
||||||
|
static av_cold void vp9dsp_scaled_mc_init(VP9DSPContext *dsp)
|
||||||
|
{
|
||||||
|
#define init_scaled(idx1, idx2, sz, type) \
|
||||||
|
dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
|
||||||
|
dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
|
||||||
|
dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c; \
|
||||||
|
dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c
|
||||||
|
|
||||||
|
#define init_scaled_put_avg(idx, sz) \
|
||||||
|
init_scaled(idx, 0, sz, put); \
|
||||||
|
init_scaled(idx, 1, sz, avg)
|
||||||
|
|
||||||
|
init_scaled_put_avg(0, 64);
|
||||||
|
init_scaled_put_avg(1, 32);
|
||||||
|
init_scaled_put_avg(2, 16);
|
||||||
|
init_scaled_put_avg(3, 8);
|
||||||
|
init_scaled_put_avg(4, 4);
|
||||||
|
|
||||||
|
#undef init_scaled_put_avg
|
||||||
|
#undef init_scaled
|
||||||
|
}
|
||||||
|
|
||||||
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp)
|
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp)
|
||||||
{
|
{
|
||||||
vp9dsp_intrapred_init(dsp);
|
vp9dsp_intrapred_init(dsp);
|
||||||
vp9dsp_itxfm_init(dsp);
|
vp9dsp_itxfm_init(dsp);
|
||||||
vp9dsp_loopfilter_init(dsp);
|
vp9dsp_loopfilter_init(dsp);
|
||||||
vp9dsp_mc_init(dsp);
|
vp9dsp_mc_init(dsp);
|
||||||
|
vp9dsp_scaled_mc_init(dsp);
|
||||||
|
|
||||||
if (ARCH_X86) ff_vp9dsp_init_x86(dsp);
|
if (ARCH_X86) ff_vp9dsp_init_x86(dsp);
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,9 @@
|
|||||||
typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
|
typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
const uint8_t *ref, ptrdiff_t ref_stride,
|
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||||
int h, int mx, int my);
|
int h, int mx, int my);
|
||||||
|
typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
|
||||||
|
const uint8_t *ref, ptrdiff_t ref_stride,
|
||||||
|
int h, int mx, int my, int dx, int dy);
|
||||||
|
|
||||||
typedef struct VP9DSPContext {
|
typedef struct VP9DSPContext {
|
||||||
/*
|
/*
|
||||||
@ -109,6 +112,12 @@ typedef struct VP9DSPContext {
|
|||||||
* dst/stride are aligned by hsize
|
* dst/stride are aligned by hsize
|
||||||
*/
|
*/
|
||||||
vp9_mc_func mc[5][4][2][2][2];
|
vp9_mc_func mc[5][4][2][2][2];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* for scalable MC, first 3 dimensions identical to above, the other two
|
||||||
|
* don't exist since it changes per stepsize.
|
||||||
|
*/
|
||||||
|
vp9_scaled_mc_func smc[5][4][2];
|
||||||
} VP9DSPContext;
|
} VP9DSPContext;
|
||||||
|
|
||||||
void ff_vp9dsp_init(VP9DSPContext *dsp);
|
void ff_vp9dsp_init(VP9DSPContext *dsp);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user