dixie: support for simple lf, bilinear subpix

Initial support for simple loopfilter and bilinear subpixel motion
interpolation. This adds support for VP8 profiles 1-3 to dixie.

Change-Id: I76d45cf9843f6f7473783b7932af94f033eb6e82
This commit is contained in:
John Koleszar
2011-02-02 16:54:31 -05:00
parent e75a400cd3
commit def479510e
3 changed files with 294 additions and 45 deletions

View File

@@ -320,10 +320,6 @@ decode_frame(struct vp8_decoder_ctx *ctx,
vpx_internal_error(&ctx->error, VPX_CODEC_UNSUP_BITSTREAM, vpx_internal_error(&ctx->error, VPX_CODEC_UNSUP_BITSTREAM,
"Experimental bitstreams not supported."); "Experimental bitstreams not supported.");
if (ctx->frame_hdr.version != 0)
vpx_internal_error(&ctx->error, VPX_CODEC_UNSUP_BITSTREAM,
"Unsupported version %d", ctx->frame_hdr.version);
data += FRAME_HEADER_SZ; data += FRAME_HEADER_SZ;
sz -= FRAME_HEADER_SZ; sz -= FRAME_HEADER_SZ;

View File

@@ -56,6 +56,16 @@ high_edge_variance(unsigned char *pixels,
return ABS(p1 - p0) > hev_threshold || ABS(q1 - q0) > hev_threshold; return ABS(p1 - p0) > hev_threshold || ABS(q1 - q0) > hev_threshold;
} }
static int
simple_threshold(unsigned char *pixels,
int stride,
int filter_limit)
{
return (ABS(p0 - q0) * 2 + (ABS(p1 - q1) >> 1)) <= filter_limit;
}
static int static int
normal_threshold(unsigned char *pixels, normal_threshold(unsigned char *pixels,
int stride, int stride,
@@ -66,7 +76,7 @@ normal_threshold(unsigned char *pixels,
int I = interior_limit; int I = interior_limit;
/* Note: Deviates from spec */ /* Note: Deviates from spec */
return (ABS(p0 - q0) * 2 + (ABS(p1 - q1) >> 1)) <= 2 * E + I return simple_threshold(pixels, stride, 2 * E + I)
&& ABS(p3 - p2) <= I && ABS(p2 - p1) <= I && ABS(p1 - p0) <= I && ABS(p3 - p2) <= I && ABS(p2 - p1) <= I && ABS(p1 - p0) <= I
&& ABS(q3 - q2) <= I && ABS(q2 - q1) <= I && ABS(q1 - q0) <= I; && ABS(q3 - q2) <= I && ABS(q2 - q1) <= I && ABS(q1 - q0) <= I;
} }
@@ -223,6 +233,38 @@ filter_subblock_h_edge(unsigned char *src,
} }
static void
filter_v_edge_simple(unsigned char *src,
int stride,
int filter_limit)
{
int i;
for (i = 0; i < 16; i++)
{
if (simple_threshold(src, 1, filter_limit))
filter_common(src, 1, 1);
src += stride;
}
}
static void
filter_h_edge_simple(unsigned char *src,
int stride,
int filter_limit)
{
int i;
for (i = 0; i < 16; i++)
{
if (simple_threshold(src, stride, filter_limit))
filter_common(src, stride, 1);
src += 1;
}
}
static void static void
calculate_filter_parameters(struct vp8_decoder_ctx *ctx, calculate_filter_parameters(struct vp8_decoder_ctx *ctx,
struct mb_info *mbi, struct mb_info *mbi,
@@ -292,12 +334,12 @@ calculate_filter_parameters(struct vp8_decoder_ctx *ctx,
*hev_threshold_ = hev_threshold; *hev_threshold_ = hev_threshold;
} }
#include <assert.h>
void static void
vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx, filter_row_normal(struct vp8_decoder_ctx *ctx,
unsigned int row, unsigned int row,
unsigned int start_col, unsigned int start_col,
unsigned int num_cols) unsigned int num_cols)
{ {
unsigned char *y, *u, *v; unsigned char *y, *u, *v;
int stride, uv_stride; int stride, uv_stride;
@@ -390,3 +432,84 @@ vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx,
mbi++; mbi++;
} }
} }
static void
filter_row_simple(struct vp8_decoder_ctx *ctx,
unsigned int row,
unsigned int start_col,
unsigned int num_cols)
{
unsigned char *y;
int stride;
struct mb_info *mbi;
unsigned int col;
int i;
/* Adjust pointers based on row, start_col */
stride = ctx->ref_frames[CURRENT_FRAME]->img.stride[PLANE_Y];
y = ctx->ref_frames[CURRENT_FRAME]->img.planes[PLANE_Y];
y += (stride * row + start_col) * 16;
mbi = ctx->mb_info_rows[row] + start_col;
for (col = start_col; col < start_col + num_cols; col++)
{
int edge_limit, interior_limit, hev_threshold;
/* TODO: only need to recalculate every MB if segmentation is
* enabled.
*/
calculate_filter_parameters(ctx, mbi, &edge_limit, &interior_limit,
&hev_threshold);
if (edge_limit)
{
/* NOTE: This conditional is actually dependent on the number
* of coefficients decoded, not the skip flag as coded in the
* bitstream. The tokens task is expected to set 31 if there
* is *any* non-zero data.
*/
int filter_subblocks = (mbi->base.eob_mask
|| mbi->base.y_mode == SPLITMV || mbi->base.y_mode == B_PRED);
int mb_limit = (edge_limit + 2) * 2 + interior_limit;
int b_limit = edge_limit * 2 + interior_limit;
if (col)
filter_v_edge_simple(y, stride, mb_limit);
if (filter_subblocks)
{
filter_v_edge_simple(y + 4, stride, b_limit);
filter_v_edge_simple(y + 8, stride, b_limit);
filter_v_edge_simple(y + 12, stride, b_limit);
}
if (row)
filter_h_edge_simple(y, stride, mb_limit);
if (filter_subblocks)
{
filter_h_edge_simple(y + 4 * stride, stride, b_limit);
filter_h_edge_simple(y + 8 * stride, stride, b_limit);
filter_h_edge_simple(y + 12 * stride, stride, b_limit);
}
}
y += 16;
mbi++;
}
}
void
vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx,
unsigned int row,
unsigned int start_col,
unsigned int num_cols)
{
if(ctx->loopfilter_hdr.use_simple)
filter_row_simple(ctx, row, start_col, num_cols);
else
filter_row_normal(ctx, row, start_col, num_cols);
}

View File

@@ -33,6 +33,20 @@ static const short sixtap_filters[8][6] =
}; };
static const short bilinear_filters[8][6] =
{
{ 0, 0, 128, 0, 0, 0 },
{ 0, 0, 112, 16, 0, 0 },
{ 0, 0, 96, 32, 0, 0 },
{ 0, 0, 80, 48, 0, 0 },
{ 0, 0, 64, 64, 0, 0 },
{ 0, 0, 48, 80, 0, 0 },
{ 0, 0, 32, 96, 0, 0 },
{ 0, 0, 16, 112, 0, 0 }
};
static void static void
predict_h_nxn(unsigned char *predict, predict_h_nxn(unsigned char *predict,
int stride, int stride,
@@ -790,6 +804,72 @@ fourtap_vert(unsigned char *output,
} }
static void
twotap_horiz(unsigned char *output,
int output_stride,
const unsigned char *reference,
int reference_stride,
int cols,
int rows,
int mx,
int my
)
{
const short *filter_x = &bilinear_filters[mx][2];
const short *filter_y = &bilinear_filters[my][2];
int r, c, temp;
for (r = 0; r < rows; r++)
{
for (c = 0; c < cols; c++)
{
temp = (reference[ 0] * filter_x[0]) +
(reference[ 1] * filter_x[1]) +
64;
temp >>= 7;
output[c] = CLAMP_255(temp);
reference++;
}
reference += reference_stride - cols;
output += output_stride;
}
}
static void
twotap_vert(unsigned char *output,
int output_stride,
const unsigned char *reference,
int reference_stride,
int cols,
int rows,
int mx,
int my
)
{
const short *filter_x = &bilinear_filters[mx][2];
const short *filter_y = &bilinear_filters[my][2];
int r, c, temp;
for (r = 0; r < rows; r++)
{
for (c = 0; c < cols; c++)
{
temp = (reference[ 0*reference_stride] * filter_y[0]) +
(reference[ 1*reference_stride] * filter_y[1]) +
64;
temp >>= 7;
output[c] = CLAMP_255(temp);
reference++;
}
reference += reference_stride - cols;
output += output_stride;
}
}
#if 1 #if 1
#define DEFINE_FILTER(kind, w, h)\ #define DEFINE_FILTER(kind, w, h)\
static void kind##_h_##w##x##h##_c(const unsigned char *reference,\ static void kind##_h_##w##x##h##_c(const unsigned char *reference,\
@@ -885,12 +965,16 @@ fourtap_vert(unsigned char *output,
DEFINE_FILTER(sixtap, 16, 16) DEFINE_FILTER(sixtap, 16, 16)
DEFINE_FILTER(fourtap, 16, 16) DEFINE_FILTER(fourtap, 16, 16)
DEFINE_FILTER(twotap, 16, 16)
DEFINE_FILTER(sixtap, 8, 8) DEFINE_FILTER(sixtap, 8, 8)
DEFINE_FILTER(fourtap, 8, 8) DEFINE_FILTER(fourtap, 8, 8)
DEFINE_FILTER(twotap, 8, 8)
DEFINE_FILTER(sixtap, 8, 4) DEFINE_FILTER(sixtap, 8, 4)
DEFINE_FILTER(fourtap, 8, 4) DEFINE_FILTER(fourtap, 8, 4)
DEFINE_FILTER(twotap, 8, 4)
DEFINE_FILTER(sixtap, 4, 4) DEFINE_FILTER(sixtap, 4, 4)
DEFINE_FILTER(fourtap, 4, 4) DEFINE_FILTER(fourtap, 4, 4)
DEFINE_FILTER(twotap, 4, 4)
#define DEFINE_SPLIT_FILTER(w, h)\ #define DEFINE_SPLIT_FILTER(w, h)\
static void filter_6h4v_##w##x##h##_c(const unsigned char *reference,\ static void filter_6h4v_##w##x##h##_c(const unsigned char *reference,\
@@ -1278,6 +1362,7 @@ predict_inter_emulated_edge(struct vp8_decoder_ctx *ctx,
int w, h, x, y, b; int w, h, x, y, b;
union mv chroma_mv[4]; union mv chroma_mv[4];
unsigned char *u = img->u, *v = img->v; unsigned char *u = img->u, *v = img->v;
int full_pixel = ctx->frame_hdr.version == 3;
x = mb_col * 16; x = mb_col * 16;
@@ -1301,6 +1386,12 @@ predict_inter_emulated_edge(struct vp8_decoder_ctx *ctx,
uvmv = mbi->base.mv; uvmv = mbi->base.mv;
uvmv.d.x = (uvmv.d.x + 1 + (uvmv.d.x >> 31) * 2) / 2; uvmv.d.x = (uvmv.d.x + 1 + (uvmv.d.x >> 31) * 2) / 2;
uvmv.d.y = (uvmv.d.y + 1 + (uvmv.d.y >> 31) * 2) / 2; uvmv.d.y = (uvmv.d.y + 1 + (uvmv.d.y >> 31) * 2) / 2;
if (full_pixel)
{
uvmv.d.x &= ~7;
uvmv.d.y &= ~7;
}
chroma_mv[0] = uvmv; chroma_mv[0] = uvmv;
chroma_mv[1] = uvmv; chroma_mv[1] = uvmv;
chroma_mv[2] = uvmv; chroma_mv[2] = uvmv;
@@ -1308,8 +1399,6 @@ predict_inter_emulated_edge(struct vp8_decoder_ctx *ctx,
} }
else else
{ {
int full_pixel = ctx->frame_hdr.version == 3;
chroma_mv[0] = calculate_chroma_splitmv(mbi, 0, full_pixel); chroma_mv[0] = calculate_chroma_splitmv(mbi, 0, full_pixel);
chroma_mv[1] = calculate_chroma_splitmv(mbi, 2, full_pixel); chroma_mv[1] = calculate_chroma_splitmv(mbi, 2, full_pixel);
chroma_mv[2] = calculate_chroma_splitmv(mbi, 8, full_pixel); chroma_mv[2] = calculate_chroma_splitmv(mbi, 8, full_pixel);
@@ -1648,41 +1737,82 @@ vp8_dixie_predict_init(struct vp8_decoder_ctx *ctx)
} }
/* TODO: Move the rest of this out of this function */ /* TODO: Move the rest of this out of this function */
ctx->mc_functions[MC_16X16][MC_4H0V] = fourtap_h_16x16_c; if(ctx->frame_hdr.version)
ctx->mc_functions[MC_16X16][MC_6H0V] = sixtap_h_16x16_c; {
ctx->mc_functions[MC_16X16][MC_0H4V] = fourtap_v_16x16_c; ctx->mc_functions[MC_16X16][MC_4H0V] = twotap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H4V] = fourtap_hv_16x16_c; ctx->mc_functions[MC_16X16][MC_6H0V] = twotap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H4V] = filter_6h4v_16x16_c; ctx->mc_functions[MC_16X16][MC_0H4V] = twotap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H6V] = sixtap_v_16x16_c; ctx->mc_functions[MC_16X16][MC_4H4V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H6V] = filter_4h6v_16x16_c; ctx->mc_functions[MC_16X16][MC_6H4V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H6V] = sixtap_hv_16x16_c; ctx->mc_functions[MC_16X16][MC_0H6V] = twotap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H6V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H6V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_8X8][MC_4H0V] = fourtap_h_8x8_c; ctx->mc_functions[MC_8X8][MC_4H0V] = twotap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H0V] = sixtap_h_8x8_c; ctx->mc_functions[MC_8X8][MC_6H0V] = twotap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H4V] = fourtap_v_8x8_c; ctx->mc_functions[MC_8X8][MC_0H4V] = twotap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H4V] = fourtap_hv_8x8_c; ctx->mc_functions[MC_8X8][MC_4H4V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H4V] = filter_6h4v_8x8_c; ctx->mc_functions[MC_8X8][MC_6H4V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H6V] = sixtap_v_8x8_c; ctx->mc_functions[MC_8X8][MC_0H6V] = twotap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H6V] = filter_4h6v_8x8_c; ctx->mc_functions[MC_8X8][MC_4H6V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H6V] = sixtap_hv_8x8_c; ctx->mc_functions[MC_8X8][MC_6H6V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X4][MC_4H0V] = fourtap_h_8x4_c; ctx->mc_functions[MC_8X4][MC_4H0V] = twotap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H0V] = sixtap_h_8x4_c; ctx->mc_functions[MC_8X4][MC_6H0V] = twotap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H4V] = fourtap_v_8x4_c; ctx->mc_functions[MC_8X4][MC_0H4V] = twotap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H4V] = fourtap_hv_8x4_c; ctx->mc_functions[MC_8X4][MC_4H4V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H4V] = filter_6h4v_8x4_c; ctx->mc_functions[MC_8X4][MC_6H4V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H6V] = sixtap_v_8x4_c; ctx->mc_functions[MC_8X4][MC_0H6V] = twotap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H6V] = filter_4h6v_8x4_c; ctx->mc_functions[MC_8X4][MC_4H6V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H6V] = sixtap_hv_8x4_c; ctx->mc_functions[MC_8X4][MC_6H6V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_4X4][MC_4H0V] = fourtap_h_4x4_c; ctx->mc_functions[MC_4X4][MC_4H0V] = twotap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H0V] = sixtap_h_4x4_c; ctx->mc_functions[MC_4X4][MC_6H0V] = twotap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H4V] = fourtap_v_4x4_c; ctx->mc_functions[MC_4X4][MC_0H4V] = twotap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H4V] = fourtap_hv_4x4_c; ctx->mc_functions[MC_4X4][MC_4H4V] = twotap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H4V] = filter_6h4v_4x4_c; ctx->mc_functions[MC_4X4][MC_6H4V] = twotap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H6V] = sixtap_v_4x4_c; ctx->mc_functions[MC_4X4][MC_0H6V] = twotap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H6V] = filter_4h6v_4x4_c; ctx->mc_functions[MC_4X4][MC_4H6V] = twotap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H6V] = sixtap_hv_4x4_c; ctx->mc_functions[MC_4X4][MC_6H6V] = twotap_hv_4x4_c;
}
else
{
ctx->mc_functions[MC_16X16][MC_4H0V] = fourtap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H0V] = sixtap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H4V] = fourtap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H4V] = fourtap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H4V] = filter_6h4v_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H6V] = sixtap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H6V] = filter_4h6v_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H6V] = sixtap_hv_16x16_c;
ctx->mc_functions[MC_8X8][MC_4H0V] = fourtap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H0V] = sixtap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H4V] = fourtap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H4V] = fourtap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H4V] = filter_6h4v_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H6V] = sixtap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H6V] = filter_4h6v_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H6V] = sixtap_hv_8x8_c;
ctx->mc_functions[MC_8X4][MC_4H0V] = fourtap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H0V] = sixtap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H4V] = fourtap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H4V] = fourtap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H4V] = filter_6h4v_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H6V] = sixtap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H6V] = filter_4h6v_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H6V] = sixtap_hv_8x4_c;
ctx->mc_functions[MC_4X4][MC_4H0V] = fourtap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H0V] = sixtap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H4V] = fourtap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H4V] = fourtap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H4V] = filter_6h4v_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H6V] = sixtap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H6V] = filter_4h6v_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H6V] = sixtap_hv_4x4_c;
}
} }
/* Find a free framebuffer to predict into */ /* Find a free framebuffer to predict into */