dixie: support for simple lf, bilinear subpix

Initial support for simple loopfilter and bilinear subpixel motion
interpolation. This adds support for VP8 profiles 1-3 to dixie.

Change-Id: I76d45cf9843f6f7473783b7932af94f033eb6e82
This commit is contained in:
John Koleszar
2011-02-02 16:54:31 -05:00
parent e75a400cd3
commit def479510e
3 changed files with 294 additions and 45 deletions

View File

@@ -320,10 +320,6 @@ decode_frame(struct vp8_decoder_ctx *ctx,
vpx_internal_error(&ctx->error, VPX_CODEC_UNSUP_BITSTREAM,
"Experimental bitstreams not supported.");
if (ctx->frame_hdr.version != 0)
vpx_internal_error(&ctx->error, VPX_CODEC_UNSUP_BITSTREAM,
"Unsupported version %d", ctx->frame_hdr.version);
data += FRAME_HEADER_SZ;
sz -= FRAME_HEADER_SZ;

View File

@@ -56,6 +56,16 @@ high_edge_variance(unsigned char *pixels,
return ABS(p1 - p0) > hev_threshold || ABS(q1 - q0) > hev_threshold;
}
static int
simple_threshold(unsigned char *pixels,
int stride,
int filter_limit)
{
return (ABS(p0 - q0) * 2 + (ABS(p1 - q1) >> 1)) <= filter_limit;
}
static int
normal_threshold(unsigned char *pixels,
int stride,
@@ -66,7 +76,7 @@ normal_threshold(unsigned char *pixels,
int I = interior_limit;
/* Note: Deviates from spec */
return (ABS(p0 - q0) * 2 + (ABS(p1 - q1) >> 1)) <= 2 * E + I
return simple_threshold(pixels, stride, 2 * E + I)
&& ABS(p3 - p2) <= I && ABS(p2 - p1) <= I && ABS(p1 - p0) <= I
&& ABS(q3 - q2) <= I && ABS(q2 - q1) <= I && ABS(q1 - q0) <= I;
}
@@ -223,6 +233,38 @@ filter_subblock_h_edge(unsigned char *src,
}
static void
filter_v_edge_simple(unsigned char *src,
int stride,
int filter_limit)
{
int i;
for (i = 0; i < 16; i++)
{
if (simple_threshold(src, 1, filter_limit))
filter_common(src, 1, 1);
src += stride;
}
}
static void
filter_h_edge_simple(unsigned char *src,
int stride,
int filter_limit)
{
int i;
for (i = 0; i < 16; i++)
{
if (simple_threshold(src, stride, filter_limit))
filter_common(src, stride, 1);
src += 1;
}
}
static void
calculate_filter_parameters(struct vp8_decoder_ctx *ctx,
struct mb_info *mbi,
@@ -292,12 +334,12 @@ calculate_filter_parameters(struct vp8_decoder_ctx *ctx,
*hev_threshold_ = hev_threshold;
}
#include <assert.h>
void
vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx,
unsigned int row,
unsigned int start_col,
unsigned int num_cols)
static void
filter_row_normal(struct vp8_decoder_ctx *ctx,
unsigned int row,
unsigned int start_col,
unsigned int num_cols)
{
unsigned char *y, *u, *v;
int stride, uv_stride;
@@ -390,3 +432,84 @@ vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx,
mbi++;
}
}
static void
filter_row_simple(struct vp8_decoder_ctx *ctx,
unsigned int row,
unsigned int start_col,
unsigned int num_cols)
{
unsigned char *y;
int stride;
struct mb_info *mbi;
unsigned int col;
int i;
/* Adjust pointers based on row, start_col */
stride = ctx->ref_frames[CURRENT_FRAME]->img.stride[PLANE_Y];
y = ctx->ref_frames[CURRENT_FRAME]->img.planes[PLANE_Y];
y += (stride * row + start_col) * 16;
mbi = ctx->mb_info_rows[row] + start_col;
for (col = start_col; col < start_col + num_cols; col++)
{
int edge_limit, interior_limit, hev_threshold;
/* TODO: only need to recalculate every MB if segmentation is
* enabled.
*/
calculate_filter_parameters(ctx, mbi, &edge_limit, &interior_limit,
&hev_threshold);
if (edge_limit)
{
/* NOTE: This conditional is actually dependent on the number
* of coefficients decoded, not the skip flag as coded in the
* bitstream. The tokens task is expected to set 31 if there
* is *any* non-zero data.
*/
int filter_subblocks = (mbi->base.eob_mask
|| mbi->base.y_mode == SPLITMV || mbi->base.y_mode == B_PRED);
int mb_limit = (edge_limit + 2) * 2 + interior_limit;
int b_limit = edge_limit * 2 + interior_limit;
if (col)
filter_v_edge_simple(y, stride, mb_limit);
if (filter_subblocks)
{
filter_v_edge_simple(y + 4, stride, b_limit);
filter_v_edge_simple(y + 8, stride, b_limit);
filter_v_edge_simple(y + 12, stride, b_limit);
}
if (row)
filter_h_edge_simple(y, stride, mb_limit);
if (filter_subblocks)
{
filter_h_edge_simple(y + 4 * stride, stride, b_limit);
filter_h_edge_simple(y + 8 * stride, stride, b_limit);
filter_h_edge_simple(y + 12 * stride, stride, b_limit);
}
}
y += 16;
mbi++;
}
}
void
vp8_dixie_loopfilter_process_row(struct vp8_decoder_ctx *ctx,
unsigned int row,
unsigned int start_col,
unsigned int num_cols)
{
if(ctx->loopfilter_hdr.use_simple)
filter_row_simple(ctx, row, start_col, num_cols);
else
filter_row_normal(ctx, row, start_col, num_cols);
}

View File

@@ -33,6 +33,20 @@ static const short sixtap_filters[8][6] =
};
static const short bilinear_filters[8][6] =
{
{ 0, 0, 128, 0, 0, 0 },
{ 0, 0, 112, 16, 0, 0 },
{ 0, 0, 96, 32, 0, 0 },
{ 0, 0, 80, 48, 0, 0 },
{ 0, 0, 64, 64, 0, 0 },
{ 0, 0, 48, 80, 0, 0 },
{ 0, 0, 32, 96, 0, 0 },
{ 0, 0, 16, 112, 0, 0 }
};
static void
predict_h_nxn(unsigned char *predict,
int stride,
@@ -790,6 +804,72 @@ fourtap_vert(unsigned char *output,
}
static void
twotap_horiz(unsigned char *output,
int output_stride,
const unsigned char *reference,
int reference_stride,
int cols,
int rows,
int mx,
int my
)
{
const short *filter_x = &bilinear_filters[mx][2];
const short *filter_y = &bilinear_filters[my][2];
int r, c, temp;
for (r = 0; r < rows; r++)
{
for (c = 0; c < cols; c++)
{
temp = (reference[ 0] * filter_x[0]) +
(reference[ 1] * filter_x[1]) +
64;
temp >>= 7;
output[c] = CLAMP_255(temp);
reference++;
}
reference += reference_stride - cols;
output += output_stride;
}
}
static void
twotap_vert(unsigned char *output,
int output_stride,
const unsigned char *reference,
int reference_stride,
int cols,
int rows,
int mx,
int my
)
{
const short *filter_x = &bilinear_filters[mx][2];
const short *filter_y = &bilinear_filters[my][2];
int r, c, temp;
for (r = 0; r < rows; r++)
{
for (c = 0; c < cols; c++)
{
temp = (reference[ 0*reference_stride] * filter_y[0]) +
(reference[ 1*reference_stride] * filter_y[1]) +
64;
temp >>= 7;
output[c] = CLAMP_255(temp);
reference++;
}
reference += reference_stride - cols;
output += output_stride;
}
}
#if 1
#define DEFINE_FILTER(kind, w, h)\
static void kind##_h_##w##x##h##_c(const unsigned char *reference,\
@@ -885,12 +965,16 @@ fourtap_vert(unsigned char *output,
DEFINE_FILTER(sixtap, 16, 16)
DEFINE_FILTER(fourtap, 16, 16)
DEFINE_FILTER(twotap, 16, 16)
DEFINE_FILTER(sixtap, 8, 8)
DEFINE_FILTER(fourtap, 8, 8)
DEFINE_FILTER(twotap, 8, 8)
DEFINE_FILTER(sixtap, 8, 4)
DEFINE_FILTER(fourtap, 8, 4)
DEFINE_FILTER(twotap, 8, 4)
DEFINE_FILTER(sixtap, 4, 4)
DEFINE_FILTER(fourtap, 4, 4)
DEFINE_FILTER(twotap, 4, 4)
#define DEFINE_SPLIT_FILTER(w, h)\
static void filter_6h4v_##w##x##h##_c(const unsigned char *reference,\
@@ -1278,6 +1362,7 @@ predict_inter_emulated_edge(struct vp8_decoder_ctx *ctx,
int w, h, x, y, b;
union mv chroma_mv[4];
unsigned char *u = img->u, *v = img->v;
int full_pixel = ctx->frame_hdr.version == 3;
x = mb_col * 16;
@@ -1301,6 +1386,12 @@ predict_inter_emulated_edge(struct vp8_decoder_ctx *ctx,
uvmv = mbi->base.mv;
uvmv.d.x = (uvmv.d.x + 1 + (uvmv.d.x >> 31) * 2) / 2;
uvmv.d.y = (uvmv.d.y + 1 + (uvmv.d.y >> 31) * 2) / 2;
if (full_pixel)
{
uvmv.d.x &= ~7;
uvmv.d.y &= ~7;
}
chroma_mv[0] = uvmv;
chroma_mv[1] = uvmv;
chroma_mv[2] = uvmv;
@@ -1308,8 +1399,6 @@ predict_inter_emulated_edge(struct vp8_decoder_ctx *ctx,
}
else
{
int full_pixel = ctx->frame_hdr.version == 3;
chroma_mv[0] = calculate_chroma_splitmv(mbi, 0, full_pixel);
chroma_mv[1] = calculate_chroma_splitmv(mbi, 2, full_pixel);
chroma_mv[2] = calculate_chroma_splitmv(mbi, 8, full_pixel);
@@ -1648,41 +1737,82 @@ vp8_dixie_predict_init(struct vp8_decoder_ctx *ctx)
}
/* TODO: Move the rest of this out of this function */
ctx->mc_functions[MC_16X16][MC_4H0V] = fourtap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H0V] = sixtap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H4V] = fourtap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H4V] = fourtap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H4V] = filter_6h4v_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H6V] = sixtap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H6V] = filter_4h6v_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H6V] = sixtap_hv_16x16_c;
if(ctx->frame_hdr.version)
{
ctx->mc_functions[MC_16X16][MC_4H0V] = twotap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H0V] = twotap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H4V] = twotap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H4V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H4V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H6V] = twotap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H6V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H6V] = twotap_hv_16x16_c;
ctx->mc_functions[MC_8X8][MC_4H0V] = fourtap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H0V] = sixtap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H4V] = fourtap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H4V] = fourtap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H4V] = filter_6h4v_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H6V] = sixtap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H6V] = filter_4h6v_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H6V] = sixtap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H0V] = twotap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H0V] = twotap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H4V] = twotap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H4V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H4V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H6V] = twotap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H6V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H6V] = twotap_hv_8x8_c;
ctx->mc_functions[MC_8X4][MC_4H0V] = fourtap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H0V] = sixtap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H4V] = fourtap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H4V] = fourtap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H4V] = filter_6h4v_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H6V] = sixtap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H6V] = filter_4h6v_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H6V] = sixtap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H0V] = twotap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H0V] = twotap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H4V] = twotap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H4V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H4V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H6V] = twotap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H6V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H6V] = twotap_hv_8x4_c;
ctx->mc_functions[MC_4X4][MC_4H0V] = fourtap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H0V] = sixtap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H4V] = fourtap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H4V] = fourtap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H4V] = filter_6h4v_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H6V] = sixtap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H6V] = filter_4h6v_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H6V] = sixtap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H0V] = twotap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H0V] = twotap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H4V] = twotap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H4V] = twotap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H4V] = twotap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H6V] = twotap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H6V] = twotap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H6V] = twotap_hv_4x4_c;
}
else
{
ctx->mc_functions[MC_16X16][MC_4H0V] = fourtap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H0V] = sixtap_h_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H4V] = fourtap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H4V] = fourtap_hv_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H4V] = filter_6h4v_16x16_c;
ctx->mc_functions[MC_16X16][MC_0H6V] = sixtap_v_16x16_c;
ctx->mc_functions[MC_16X16][MC_4H6V] = filter_4h6v_16x16_c;
ctx->mc_functions[MC_16X16][MC_6H6V] = sixtap_hv_16x16_c;
ctx->mc_functions[MC_8X8][MC_4H0V] = fourtap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H0V] = sixtap_h_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H4V] = fourtap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H4V] = fourtap_hv_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H4V] = filter_6h4v_8x8_c;
ctx->mc_functions[MC_8X8][MC_0H6V] = sixtap_v_8x8_c;
ctx->mc_functions[MC_8X8][MC_4H6V] = filter_4h6v_8x8_c;
ctx->mc_functions[MC_8X8][MC_6H6V] = sixtap_hv_8x8_c;
ctx->mc_functions[MC_8X4][MC_4H0V] = fourtap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H0V] = sixtap_h_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H4V] = fourtap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H4V] = fourtap_hv_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H4V] = filter_6h4v_8x4_c;
ctx->mc_functions[MC_8X4][MC_0H6V] = sixtap_v_8x4_c;
ctx->mc_functions[MC_8X4][MC_4H6V] = filter_4h6v_8x4_c;
ctx->mc_functions[MC_8X4][MC_6H6V] = sixtap_hv_8x4_c;
ctx->mc_functions[MC_4X4][MC_4H0V] = fourtap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H0V] = sixtap_h_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H4V] = fourtap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H4V] = fourtap_hv_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H4V] = filter_6h4v_4x4_c;
ctx->mc_functions[MC_4X4][MC_0H6V] = sixtap_v_4x4_c;
ctx->mc_functions[MC_4X4][MC_4H6V] = filter_4h6v_4x4_c;
ctx->mc_functions[MC_4X4][MC_6H6V] = sixtap_hv_4x4_c;
}
}
/* Find a free framebuffer to predict into */