diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h index 429747063..3418e3655 100644 --- a/libmkv/EbmlIDs.h +++ b/libmkv/EbmlIDs.h @@ -120,7 +120,7 @@ enum mkv //video Video = 0xE0, FlagInterlaced = 0x9A, -// StereoMode = 0x53B8, + StereoMode = 0x53B8, PixelWidth = 0xB0, PixelHeight = 0xBA, PixelCropBottom = 0x54AA, diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index 2dee21191..c457312f4 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -138,25 +138,25 @@ void vp8_setup_version(VP8_COMMON *cm) { case 0: cm->no_lpf = 0; - cm->simpler_lpf = 0; + cm->filter_type = NORMAL_LOOPFILTER; cm->use_bilinear_mc_filter = 0; cm->full_pixel = 0; break; case 1: cm->no_lpf = 0; - cm->simpler_lpf = 1; + cm->filter_type = SIMPLE_LOOPFILTER; cm->use_bilinear_mc_filter = 1; cm->full_pixel = 0; break; case 2: cm->no_lpf = 1; - cm->simpler_lpf = 0; + cm->filter_type = NORMAL_LOOPFILTER; cm->use_bilinear_mc_filter = 1; cm->full_pixel = 0; break; case 3: cm->no_lpf = 1; - cm->simpler_lpf = 1; + cm->filter_type = SIMPLE_LOOPFILTER; cm->use_bilinear_mc_filter = 1; cm->full_pixel = 1; break; @@ -171,7 +171,7 @@ void vp8_create_common(VP8_COMMON *oci) oci->mb_no_coeff_skip = 1; oci->no_lpf = 0; - oci->simpler_lpf = 0; + oci->filter_type = NORMAL_LOOPFILTER; oci->use_bilinear_mc_filter = 0; oci->full_pixel = 0; oci->multi_token_partition = ONE_PARTITION; diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c index 3532a0356..6d1caa485 100644 --- a/vp8/common/arm/loopfilter_arm.c +++ b/vp8/common/arm/loopfilter_arm.c @@ -38,9 +38,8 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon; /*ARMV6 loopfilter functions*/ /* Horizontal MB filtering */ void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -51,20 +50,18 @@ void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig } void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -75,20 +72,18 @@ void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig } void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -101,12 +96,11 @@ void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign } void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -114,9 +108,8 @@ void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig /* Vertical B Filtering */ void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -129,12 +122,11 @@ void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign } void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -145,9 +137,8 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig /* NEON loopfilter functions */ /* Horizontal MB filtering */ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -155,20 +146,18 @@ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign } void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -176,20 +165,18 @@ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign } void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -199,12 +186,11 @@ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigne } void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -212,9 +198,8 @@ void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign /* Vertical B Filtering */ void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -224,12 +209,11 @@ void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigne } void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 3c3592aab..88bd59592 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -171,9 +171,7 @@ typedef struct unsigned char partitioning; unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ - unsigned char dc_diff; unsigned char need_to_clamp_mvs; - unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */ } MB_MODE_INFO; diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index 37c5b7740..a3242716f 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -25,9 +25,8 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c); /* Horizontal MB filtering */ void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -38,20 +37,18 @@ void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned } void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -62,20 +59,18 @@ void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned } void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -88,12 +83,11 @@ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c } void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -101,9 +95,8 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned /* Vertical B Filtering */ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -116,12 +109,11 @@ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c } void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -353,6 +345,9 @@ void vp8_loop_filter_frame for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0; + int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED && + mbd->mode_info_context->mbmi.mode != SPLITMV && + mbd->mode_info_context->mbmi.mb_skip_coeff); filter_level = baseline_filter_level[Segment]; @@ -365,17 +360,17 @@ void vp8_loop_filter_frame if (filter_level) { if (mb_col > 0) - cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); + cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]); - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); + if (!skip_lf) + cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]); /* don't apply across umv border */ if (mb_row > 0) - cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); + cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]); - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); + if (!skip_lf) + cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level]); } y_ptr += 16; @@ -457,6 +452,10 @@ void vp8_loop_filter_frame_yonly for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0; + int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED && + mbd->mode_info_context->mbmi.mode != SPLITMV && + mbd->mode_info_context->mbmi.mb_skip_coeff); + filter_level = baseline_filter_level[Segment]; /* Apply any context driven MB level adjustment */ @@ -465,17 +464,17 @@ void vp8_loop_filter_frame_yonly if (filter_level) { if (mb_col > 0) - cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + if (!skip_lf) + cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); /* don't apply across umv border */ if (mb_row > 0) - cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + if (!skip_lf) + cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); } y_ptr += 16; @@ -565,20 +564,24 @@ void vp8_loop_filter_partial_frame for (mb_col = 0; mb_col < mb_cols; mb_col++) { int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0; + int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED && + mbd->mode_info_context->mbmi.mode != SPLITMV && + mbd->mode_info_context->mbmi.mb_skip_coeff); + filter_level = baseline_filter_level[Segment]; if (filter_level) { if (mb_col > 0) - cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + if (!skip_lf) + cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); - cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); + if (!skip_lf) + cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level]); } y_ptr += 16; diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h index 2e5997c73..ca136b3a4 100644 --- a/vp8/common/loopfilter.h +++ b/vp8/common/loopfilter.h @@ -41,7 +41,7 @@ typedef struct #define prototype_loopfilter_block(sym) \ void sym(unsigned char *y, unsigned char *u, unsigned char *v,\ - int ystride, int uv_stride, loop_filter_info *lfi, int simpler) + int ystride, int uv_stride, loop_filter_info *lfi) #if ARCH_X86 || ARCH_X86_64 #include "x86/loopfilter_x86.h" diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 426b8fc2b..a05951933 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -109,6 +109,7 @@ extern "C" int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0 int Sharpness; // parameter used for sharpening output: recommendation 0: int cpu_used; + unsigned int rc_max_intra_bitrate_pct; // mode -> //(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing @@ -139,8 +140,9 @@ extern "C" int end_usage; // vbr or cbr - // shoot to keep buffer full at all times by undershooting a bit 95 recommended + // buffer targeting aggressiveness int under_shoot_pct; + int over_shoot_pct; // buffering parameters int starting_buffer_level; // in seconds diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index a91cb337b..e67d39cbb 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -120,7 +120,6 @@ typedef struct VP8Common int experimental; int mb_no_coeff_skip; int no_lpf; - int simpler_lpf; int use_bilinear_mc_filter; int full_pixel; diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index 5bfc7d6fb..660880b52 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -804,11 +804,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t for (j = 0; j < mb_cols; j++) { char zz[4]; + int dc_diff = !(mi[mb_index].mbmi.mode != B_PRED && + mi[mb_index].mbmi.mode != SPLITMV && + mi[mb_index].mbmi.mb_skip_coeff)); if (oci->frame_type == KEY_FRAME) sprintf(zz, "a"); else - sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0'); + sprintf(zz, "%c", dc_diff + '0'); vp8_blit_text(zz, y_ptr, post->y_stride); mb_index ++; diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c index e602feedc..71bf6e2d7 100644 --- a/vp8/common/ppc/loopfilter_altivec.c +++ b/vp8/common/ppc/loopfilter_altivec.c @@ -53,9 +53,8 @@ loop_filter_function_s_ppc loop_filter_simple_vertical_edge_ppc; // Horizontal MB filtering void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); if (u_ptr) @@ -63,9 +62,8 @@ void loop_filter_mbh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned ch } void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; (void)u_ptr; (void)v_ptr; (void)uv_stride; @@ -74,9 +72,8 @@ void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c // Vertical MB Filtering void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); if (u_ptr) @@ -84,9 +81,8 @@ void loop_filter_mbv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned ch } void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; (void)u_ptr; (void)v_ptr; (void)uv_stride; @@ -95,9 +91,8 @@ void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned c // Horizontal B Filtering void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; // These should all be done at once with one call, instead of 3 loop_filter_horizontal_edge_y_ppc(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); loop_filter_horizontal_edge_y_ppc(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); @@ -108,9 +103,8 @@ void loop_filter_bh_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned cha } void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; (void)u_ptr; (void)v_ptr; (void)uv_stride; @@ -121,9 +115,8 @@ void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned ch // Vertical B Filtering void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr); if (u_ptr) @@ -131,9 +124,8 @@ void loop_filter_bv_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned cha } void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void)simpler_lpf; (void)u_ptr; (void)v_ptr; (void)uv_stride; diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c index 5837bc0dc..a52420c98 100644 --- a/vp8/common/x86/loopfilter_x86.c +++ b/vp8/common/x86/loopfilter_x86.c @@ -42,9 +42,8 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; #if HAVE_MMX /* Horizontal MB filtering */ void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -56,21 +55,19 @@ void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -82,21 +79,19 @@ void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -110,12 +105,11 @@ void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -124,9 +118,8 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne /* Vertical B Filtering */ void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -140,12 +133,11 @@ void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -156,9 +148,8 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne /* Horizontal MB filtering */ #if HAVE_SSE2 void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -167,21 +158,19 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) @@ -190,21 +179,19 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -215,12 +202,11 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -229,9 +215,8 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign /* Vertical B Filtering */ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { - (void) simpler_lpf; vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); @@ -242,12 +227,11 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) + int y_stride, int uv_stride, loop_filter_info *lfi) { (void) u_ptr; (void) v_ptr; (void) uv_stride; - (void) simpler_lpf; vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index be4cab281..3df0ba73a 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -111,9 +111,8 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) */ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) { - if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp8_build_intra_predictors_mbuv_s(xd); RECON_INVOKE(&pbi->common.rtcd.recon, build_intra_predictors_mby_s)(xd); @@ -195,11 +194,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) clamp_mvs(xd); } - xd->mode_info_context->mbmi.dc_diff = 1; - - if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0) + eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV); + if (!eobtotal) { - xd->mode_info_context->mbmi.dc_diff = 0; skip_recon_mb(pbi, xd); return; } @@ -208,7 +206,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) mb_init_dequantizer(pbi, xd); /* do prediction */ - if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8_build_intra_predictors_mbuv(xd); @@ -255,7 +253,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs, xd->block[24].diff); } - else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED) + else if (xd->mode_info_context->mbmi.mode == B_PRED) { for (i = 0; i < 16; i++) { diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 57af07925..06109e125 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -108,12 +108,10 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m clamp_mvs(xd); } - xd->mode_info_context->mbmi.dc_diff = 1; - - if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0) + eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV); + if (!eobtotal) { - xd->mode_info_context->mbmi.dc_diff = 0; - /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/ if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { @@ -322,6 +320,7 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) if (pbi->common.filter_level) { + int skip_lf; if( mb_row != pc->mb_rows-1 ) { /* Save decoded MB last row data for next-row decoding */ @@ -349,6 +348,10 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) /* update loopfilter info */ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; + skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV && + xd->mode_info_context->mbmi.mb_skip_coeff); + filter_level = pbi->mt_baseline_filter_level[Segment]; /* Distance of Mb to the various image edges. * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units @@ -360,17 +363,17 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) if (filter_level) { if (mb_col > 0) - pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); - if (xd->mode_info_context->mbmi.dc_diff > 0) - pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + if (!skip_lf) + pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); /* don't apply across umv border */ if (mb_row > 0) - pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); - if (xd->mode_info_context->mbmi.dc_diff > 0) - pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + if (!skip_lf) + pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); } } @@ -810,6 +813,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) if (pbi->common.filter_level) { + int skip_lf; /* Save decoded MB last row data for next-row decoding */ if(mb_row != pc->mb_rows-1) { @@ -837,6 +841,9 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) /* update loopfilter info */ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; + skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV && + xd->mode_info_context->mbmi.mb_skip_coeff); filter_level = pbi->mt_baseline_filter_level[Segment]; /* Distance of Mb to the various image edges. * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units @@ -848,17 +855,17 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) if (filter_level) { if (mb_col > 0) - pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); - if (xd->mode_info_context->mbmi.dc_diff > 0) - pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + if (!skip_lf) + pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); /* don't apply across umv border */ if (mb_row > 0) - pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); - if (xd->mode_info_context->mbmi.dc_diff > 0) - pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + if (!skip_lf) + pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level]); } } diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index a343ef769..98bd983a7 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1538,11 +1538,6 @@ int vp8cx_encode_inter_macroblock { if (cpi->common.mb_no_coeff_skip) { - if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) - xd->mode_info_context->mbmi.dc_diff = 0; - else - xd->mode_info_context->mbmi.dc_diff = 1; - xd->mode_info_context->mbmi.mb_skip_coeff = 1; cpi->skip_true_count ++; vp8_fix_contexts(xd); diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 5185c0edf..8d1429e0b 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1525,10 +1525,6 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; // Initialise the starting buffer levels - cpi->oxcf.starting_buffer_level = - rescale(cpi->oxcf.starting_buffer_level, - cpi->oxcf.target_bandwidth, 1000); - cpi->buffer_level = cpi->oxcf.starting_buffer_level; cpi->bits_off_target = cpi->oxcf.starting_buffer_level; @@ -1701,6 +1697,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) // Convert target bandwidth from Kbit/s to Bit/s cpi->oxcf.target_bandwidth *= 1000; + cpi->oxcf.starting_buffer_level = + rescale(cpi->oxcf.starting_buffer_level, + cpi->oxcf.target_bandwidth, 1000); + // Set or reset optimal and maximum buffer levels. if (cpi->oxcf.optimal_buffer_level == 0) cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; @@ -1750,8 +1750,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) // Only allow dropped frames in buffered mode cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode; - cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type; - if (!cm->use_bilinear_mc_filter) cm->mcomp_filter_type = SIXTAP; else @@ -2726,16 +2724,17 @@ static int pick_frame_size(VP8_COMP *cpi) if (cpi->pass == 2) vp8_calc_auto_iframe_target_size(cpi); - // 1 Pass there is no information on which to base size so use bandwidth per second * fixed fraction else #endif - cpi->this_frame_target = cpi->oxcf.target_bandwidth / 2; - - // in error resilient mode the first frame is bigger since it likely contains - // all the static background - if (cpi->oxcf.error_resilient_mode == 1 || (cpi->compressor_speed == 2)) { - cpi->this_frame_target *= 3; // 5; + /* 1 Pass there is no information on which to base size so use + * bandwidth per second * fraction of the initial buffer + * level + */ + cpi->this_frame_target = cpi->oxcf.starting_buffer_level / 2; + + if(cpi->this_frame_target > cpi->oxcf.target_bandwidth * 3 / 2) + cpi->this_frame_target = cpi->oxcf.target_bandwidth * 3 / 2; } // Key frame from VFW/auto-keyframe/first frame @@ -2769,6 +2768,19 @@ static int pick_frame_size(VP8_COMP *cpi) } } + /* Apply limits on keyframe target. + * + * TODO: move this after consolidating + * vp8_calc_iframe_target_size() and vp8_calc_auto_iframe_target_size() + */ + if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_max_intra_bitrate_pct) + { + unsigned int max_rate = cpi->av_per_frame_bandwidth + * cpi->oxcf.rc_max_intra_bitrate_pct / 100; + + if (cpi->this_frame_target > max_rate) + cpi->this_frame_target = max_rate; + } return 1; } @@ -5274,35 +5286,6 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const } -static int calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd) -{ - int i, j; - int Total = 0; - - unsigned char *src = source->y_buffer; - unsigned char *dst = dest->y_buffer; - (void)rtcd; - - // Loop through the Y plane raw and reconstruction data summing (square differences) - for (i = 0; i < source->y_height; i += 16) - { - for (j = 0; j < source->y_width; j += 16) - { - unsigned int sse; - VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse); - - if (sse < 8096) - Total += sse; - } - - src += 16 * source->y_stride; - dst += 16 * dest->y_stride; - } - - return Total; -} - - int vp8_get_quantizer(VP8_PTR c) { VP8_COMP *cpi = (VP8_COMP *) c; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 0e4a8c142..57cf4c000 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -510,7 +510,6 @@ typedef struct int auto_adjust_key_quantizer; int keyquantizer; int auto_worst_q; - int filter_type; int cpu_used; int chroma_boost; int horiz_scale; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index c56593e0b..765ff26f6 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -938,7 +938,6 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re best_mbmode.uv_mode = 0; best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; best_mbmode.partitioning = 0; - best_mbmode.dc_diff = 0; vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 8d639ee28..fa9f04b15 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -707,8 +707,6 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) int min_frame_target; int Adjustment; - // Set the min frame bandwidth. - //min_frame_target = estimate_min_frame_size( cpi ); min_frame_target = 0; if (cpi->pass == 2) @@ -862,11 +860,6 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) } } - // Set a reduced data rate target for our initial Q calculation. - // This should help to save bits during earier sections. - if ((cpi->oxcf.under_shoot_pct > 0) && (cpi->oxcf.under_shoot_pct <= 100)) - cpi->this_frame_target = (cpi->this_frame_target * cpi->oxcf.under_shoot_pct) / 100; - // Sanity check that the total sum of adjustments is not above the maximum allowed // That is that having allowed for KF and GF penalties we have not pushed the // current interframe target to low. If the adjustment we apply here is not capable of recovering @@ -903,11 +896,6 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) percent_low = (cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / one_percent_bits; - - if (percent_low > 100) - percent_low = 100; - else if (percent_low < 0) - percent_low = 0; } // Are we overshooting the long term clip data rate... else if (cpi->bits_off_target < 0) @@ -915,16 +903,16 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) // Adjust per frame data target downwards to compensate. percent_low = (int)(100 * -cpi->bits_off_target / (cpi->total_byte_count * 8)); - - if (percent_low > 100) - percent_low = 100; - else if (percent_low < 0) - percent_low = 0; } + if (percent_low > cpi->oxcf.under_shoot_pct) + percent_low = cpi->oxcf.under_shoot_pct; + else if (percent_low < 0) + percent_low = 0; + // lower the target bandwidth for this frame. - cpi->this_frame_target = - (cpi->this_frame_target * (100 - (percent_low / 2))) / 100; + cpi->this_frame_target -= (cpi->this_frame_target * percent_low) + / 200; // Are we using allowing control of active_worst_allowed_q // according to buffer level. @@ -995,20 +983,29 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) } else { - int percent_high; + int percent_high = 0; - if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level) + if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)) { - percent_high = (int)(100 * (cpi->bits_off_target - cpi->oxcf.optimal_buffer_level) / (cpi->total_byte_count * 8)); - - if (percent_high > 100) - percent_high = 100; - else if (percent_high < 0) - percent_high = 0; - - cpi->this_frame_target = (cpi->this_frame_target * (100 + (percent_high / 2))) / 100; - + percent_high = (cpi->buffer_level + - cpi->oxcf.optimal_buffer_level) + / one_percent_bits; } + else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level) + { + percent_high = (int)((100 * cpi->bits_off_target) + / (cpi->total_byte_count * 8)); + } + + if (percent_high > cpi->oxcf.over_shoot_pct) + percent_high = cpi->oxcf.over_shoot_pct; + else if (percent_high < 0) + percent_high = 0; + + cpi->this_frame_target += (cpi->this_frame_target * + percent_high) / 200; + // Are we allowing control of active_worst_allowed_q according to bufferl level. if (cpi->auto_worst_q) @@ -1464,39 +1461,6 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) return Q; } -static int estimate_min_frame_size(VP8_COMP *cpi) -{ - double correction_factor; - int bits_per_mb_at_max_q; - - // This funtion returns a default value for the first few frames untill the correction factor has had time to adapt. - if (cpi->common.current_video_frame < 10) - { - if (cpi->pass == 2) - return (cpi->min_frame_bandwidth); - else - return cpi->per_frame_bandwidth / 3; - } - - /* // Select the appropriate correction factor based upon type of frame. - if ( cpi->common.frame_type == KEY_FRAME ) - correction_factor = cpi->key_frame_rate_correction_factor; - else - { - if ( cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame ) - correction_factor = cpi->gf_rate_correction_factor; - else - correction_factor = cpi->rate_correction_factor; - }*/ - - // We estimate at half the value we get from vp8_bits_per_mb - correction_factor = cpi->rate_correction_factor / 2.0; - - bits_per_mb_at_max_q = (int)(.5 + correction_factor * vp8_bits_per_mb[cpi->common.frame_type][MAXQ]); - - return (bits_per_mb_at_max_q * cpi->common.MBs) >> BPER_MB_NORMBITS; -} - static int estimate_keyframe_frequency(VP8_COMP *cpi) { @@ -1513,8 +1477,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) /* Assume a default of 1 kf every 2 seconds, or the max kf interval, * whichever is smaller. */ + int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1; av_key_frame_frequency = (int)cpi->output_frame_rate * 2; - if (av_key_frame_frequency > cpi->oxcf.key_freq) + + if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq) av_key_frame_frequency = cpi->oxcf.key_freq; cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 54cfab8c5..dfc9bec95 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -298,9 +298,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) #endif vp8_set_speed_features(cpi); - if (cpi->common.simpler_lpf) - cpi->common.filter_type = SIMPLE_LOOPFILTER; - q = (int)pow(vp8_dc_quant(QIndex,0), 1.25); if (q < 8) @@ -2526,7 +2523,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int best_mbmode.uv_mode = 0; best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; best_mbmode.partitioning = 0; - best_mbmode.dc_diff = 0; vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index ccf0c7f34..1c5923813 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -224,18 +224,9 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) int plane_type; int b; - TOKENEXTRA *start = *t; - TOKENEXTRA *tp = *t; - - x->mode_info_context->mbmi.dc_diff = 1; - - -#if 1 - x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x); if (x->mode_info_context->mbmi.mb_skip_coeff) { - cpi->skip_true_count++; if (!cpi->common.mb_no_coeff_skip) @@ -245,17 +236,11 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) vp8_fix_contexts(x); } - if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) - x->mode_info_context->mbmi.dc_diff = 0; - else - x->mode_info_context->mbmi.dc_diff = 1; - - return; } cpi->skip_false_count++; -#endif + #if 0 vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts)); #endif @@ -282,42 +267,6 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) A + vp8_block2above[b], L + vp8_block2left[b], cpi); -#if 0 - - if (cpi->common.mb_no_coeff_skip) - { - int skip = 1; - - while ((tp != *t) && skip) - { - skip = (skip && (tp->Token == DCT_EOB_TOKEN)); - tp ++; - } - - if (skip != x->mbmi.mb_skip_coeff) - skip += 0; - - x->mbmi.mb_skip_coeff = skip; - - if (x->mbmi.mb_skip_coeff == 1) - { - x->mbmi.dc_diff = 0; - //redo the coutnts - vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts)); - - *t = start; - cpi->skip_true_count++; - //skip_true_count++; - } - else - { - - cpi->skip_false_count++; - //skip_false_count++; - } - } - -#endif } @@ -500,13 +449,6 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) A + vp8_block2above[24], L + vp8_block2left[24], cpi); plane_type = 0; - - if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) - x->mode_info_context->mbmi.dc_diff = 0; - else - x->mode_info_context->mbmi.dc_diff = 1; - - for (b = 0; b < 16; b++) stuff1st_order_b(x->block + b, t, plane_type, x->frame_type, A + vp8_block2above[b], diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm new file mode 100644 index 000000000..258899eed --- /dev/null +++ b/vp8/encoder/x86/quantize_sse4.asm @@ -0,0 +1,254 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" +%include "asm_enc_offsets.asm" + + +; void vp8_regular_quantize_b_sse4 | arg +; (BLOCK *b, | 0 +; BLOCKD *d) | 1 + +global sym(vp8_regular_quantize_b_sse4) +sym(vp8_regular_quantize_b_sse4): + +%if ABI_IS_32BIT + push rbp + mov rbp, rsp + GET_GOT rbx + push rdi + push rsi + + ALIGN_STACK 16, rax + %define qcoeff 0 ; 32 + %define stack_size 32 + sub rsp, stack_size +%else + %ifidn __OUTPUT_FORMAT__,x64 + SAVE_XMM 8, u + push rdi + push rsi + %endif +%endif + ; end prolog + +%if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d +%else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif +%endif + + mov rax, [rdi + vp8_block_coeff] + mov rcx, [rdi + vp8_block_zbin] + mov rdx, [rdi + vp8_block_round] + movd xmm7, [rdi + vp8_block_zbin_extra] + + ; z + movdqa xmm0, [rax] + movdqa xmm1, [rax + 16] + + ; duplicate zbin_oq_value + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + ; sz + psraw xmm0, 15 + psraw xmm1, 15 + + ; (z ^ sz) + pxor xmm2, xmm0 + pxor xmm3, xmm1 + + ; x = abs(z) + psubw xmm2, xmm0 + psubw xmm3, xmm1 + + ; zbin + movdqa xmm4, [rcx] + movdqa xmm5, [rcx + 16] + + ; *zbin_ptr + zbin_oq_value + paddw xmm4, xmm7 + paddw xmm5, xmm7 + + movdqa xmm6, xmm2 + movdqa xmm7, xmm3 + + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm6, xmm4 + psubw xmm7, xmm5 + + ; round + movdqa xmm4, [rdx] + movdqa xmm5, [rdx + 16] + + mov rax, [rdi + vp8_block_quant_shift] + mov rcx, [rdi + vp8_block_quant] + mov rdx, [rdi + vp8_block_zrun_zbin_boost] + + ; x + round + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + ; quant + movdqa xmm4, [rcx] + movdqa xmm5, [rcx + 16] + + ; y = x * quant_ptr >> 16 + pmulhw xmm4, xmm2 + pmulhw xmm5, xmm3 + + ; y += x + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + pxor xmm4, xmm4 +%if ABI_IS_32BIT + movdqa [rsp + qcoeff], xmm4 + movdqa [rsp + qcoeff + 16], xmm4 +%else + pxor xmm8, xmm8 +%endif + + ; quant_shift + movdqa xmm5, [rax] + + ; zrun_zbin_boost + mov rax, rdx + +%macro ZIGZAG_LOOP 5 + ; x + pextrw ecx, %4, %2 + + ; if (x >= zbin) + sub cx, WORD PTR[rdx] ; x - zbin + lea rdx, [rdx + 2] ; zbin_boost_ptr++ + jl rq_zigzag_loop_%1 ; x < zbin + + pextrw edi, %3, %2 ; y + + ; downshift by quant_shift[rc] + pextrb ecx, xmm5, %1 ; quant_shift[rc] + sar edi, cl ; also sets Z bit + je rq_zigzag_loop_%1 ; !y +%if ABI_IS_32BIT + mov WORD PTR[rsp + qcoeff + %1 *2], di +%else + pinsrw %5, edi, %2 ; qcoeff[rc] +%endif + mov rdx, rax ; reset to b->zrun_zbin_boost +rq_zigzag_loop_%1: +%endmacro +; in vp8_default_zig_zag1d order: see vp8/common/entropy.c +ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 + + mov rcx, [rsi + vp8_blockd_dequant] + mov rdi, [rsi + vp8_blockd_dqcoeff] + +%if ABI_IS_32BIT + movdqa xmm4, [rsp + qcoeff] + movdqa xmm5, [rsp + qcoeff + 16] +%else + %define xmm5 xmm8 +%endif + + ; y ^ sz + pxor xmm4, xmm0 + pxor xmm5, xmm1 + ; x = (y ^ sz) - sz + psubw xmm4, xmm0 + psubw xmm5, xmm1 + + ; dequant + movdqa xmm0, [rcx] + movdqa xmm1, [rcx + 16] + + mov rcx, [rsi + vp8_blockd_qcoeff] + + pmullw xmm0, xmm4 + pmullw xmm1, xmm5 + + ; store qcoeff + movdqa [rcx], xmm4 + movdqa [rcx + 16], xmm5 + + ; store dqcoeff + movdqa [rdi], xmm0 + movdqa [rdi + 16], xmm1 + + ; select the last value (in zig_zag order) for EOB + pxor xmm6, xmm6 + pcmpeqw xmm4, xmm6 + pcmpeqw xmm5, xmm6 + + packsswb xmm4, xmm5 + pshufb xmm4, [GLOBAL(zig_zag1d)] + pmovmskb edx, xmm4 + xor rdi, rdi + mov eax, -1 + xor dx, ax + bsr eax, edx + sub edi, edx + sar edi, 31 + add eax, 1 + and eax, edi + + mov [rsi + vp8_blockd_eob], eax + + ; begin epilog +%if ABI_IS_32BIT + add rsp, stack_size + pop rsp + + pop rsi + pop rdi + RESTORE_GOT + pop rbp +%else + %undef xmm5 + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + RESTORE_XMM + %endif +%endif + + ret + +SECTION_RODATA +align 16 +; vp8/common/entropy.c: vp8_default_zig_zag1d +zig_zag1d: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h index f09358061..bbe475f8c 100644 --- a/vp8/encoder/x86/quantize_x86.h +++ b/vp8/encoder/x86/quantize_x86.h @@ -51,4 +51,17 @@ extern prototype_quantize_block(vp8_fast_quantize_b_ssse3); #endif /* HAVE_SSSE3 */ + +#if HAVE_SSE4_1 +extern prototype_quantize_block(vp8_regular_quantize_b_sse4); + +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_quantize_quantb +#define vp8_quantize_quantb vp8_regular_quantize_b_sse4 + +#endif /* !CONFIG_RUNTIME_CPU_DETECT */ + +#endif /* HAVE_SSE4_1 */ + #endif /* QUANTIZE_X86_H */ diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 47a1788bc..b01319fa4 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -313,6 +313,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; cpi->rtcd.search.full_search = vp8_full_search_sadx8; + + cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse4; } #endif diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index b684ad006..462494c79 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -153,7 +153,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); #endif RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ); - RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100); + RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000); + RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); //RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); @@ -307,6 +308,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, } oxcf->target_bandwidth = cfg.rc_target_bitrate; + oxcf->rc_max_intra_bitrate_pct = cfg.rc_max_intra_bitrate_pct; oxcf->best_allowed_q = cfg.rc_min_quantizer; oxcf->worst_allowed_q = cfg.rc_max_quantizer; @@ -314,7 +316,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg.rc_undershoot_pct; - //oxcf->over_shoot_pct = cfg.rc_overshoot_pct; + oxcf->over_shoot_pct = cfg.rc_overshoot_pct; oxcf->maximum_buffer_size = cfg.rc_buf_sz; oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; @@ -360,6 +362,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, printf("key_freq: %d\n", oxcf->key_freq); printf("end_usage: %d\n", oxcf->end_usage); printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct); + printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct); printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level); printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level); printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size); @@ -1105,11 +1108,11 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = {0}, /* rc_twopass_stats_in */ #endif 256, /* rc_target_bandwidth */ - + 0, /* rc_max_intra_bitrate_pct */ 4, /* rc_min_quantizer */ 63, /* rc_max_quantizer */ - 95, /* rc_undershoot_pct */ - 200, /* rc_overshoot_pct */ + 100, /* rc_undershoot_pct */ + 100, /* rc_overshoot_pct */ 6000, /* rc_max_buffer_size */ 4000, /* rc_buffer_initial_size; */ diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index a7b68dba7..c17837164 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -117,6 +117,7 @@ VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_ssse3.c VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_impl_ssse3.asm VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm +VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 9c44414b2..6dbce0d56 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -398,6 +398,21 @@ extern "C" { unsigned int rc_target_bitrate; + /*!\brief Max data rate for Intra frames + * + * This value controls additional clamping on the maximum size of a + * keyframe. It is expressed as a percentage of the average + * per-frame bitrate, with the special (and default) value 0 meaning + * unlimited, or no additional clamping beyond the codec's built-in + * algorithm. + * + * For example, to allocate no more than 4.5 frames worth of bitrate + * to a keyframe, set this to 450. + * + */ + unsigned int rc_max_intra_bitrate_pct; + + /* * quantizer settings */ @@ -430,20 +445,28 @@ extern "C" { */ - /*!\brief Rate control undershoot tolerance + /*!\brief Rate control adaptation undershoot control * - * This value, expressed as a percentage of the target bitrate, describes - * the target bitrate for easier frames, allowing bits to be saved for - * harder frames. Set to zero to use the codec default. + * This value, expressed as a percentage of the target bitrate, + * controls the maximum allowed adaptation speed of the codec. + * This factor controls the maximum amount of bits that can + * be subtracted from the target bitrate in order to compensate + * for prior overshoot. + * + * Valid values in the range 0-1000. */ unsigned int rc_undershoot_pct; - /*!\brief Rate control overshoot tolerance + /*!\brief Rate control adaptation overshoot control * - * This value, expressed as a percentage of the target bitrate, describes - * the maximum allowed bitrate for a given frame. Set to zero to use the - * codec default. + * This value, expressed as a percentage of the target bitrate, + * controls the maximum allowed adaptation speed of the codec. + * This factor controls the maximum amount of bits that can + * be added to the target bitrate in order to compensate for + * prior undershoot. + * + * Valid values in the range 0-1000. */ unsigned int rc_overshoot_pct; diff --git a/vpxenc.c b/vpxenc.c index 93ad6f647..14775031b 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -260,6 +260,16 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats) return stats->buf; } +/* Stereo 3D packed frame format */ +typedef enum stereo_format +{ + STEREO_FORMAT_MONO = 0, + STEREO_FORMAT_LEFT_RIGHT = 1, + STEREO_FORMAT_BOTTOM_TOP = 2, + STEREO_FORMAT_TOP_BOTTOM = 3, + STEREO_FORMAT_RIGHT_LEFT = 11 +} stereo_format_t; + enum video_file_type { FILE_TYPE_RAW, @@ -610,7 +620,8 @@ write_webm_seek_info(EbmlGlobal *ebml) static void write_webm_file_header(EbmlGlobal *glob, const vpx_codec_enc_cfg_t *cfg, - const struct vpx_rational *fps) + const struct vpx_rational *fps, + stereo_format_t stereo_fmt) { { EbmlLoc start; @@ -654,6 +665,7 @@ write_webm_file_header(EbmlGlobal *glob, Ebml_StartSubElement(glob, &videoStart, Video); Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); + Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); Ebml_SerializeFloat(glob, FrameRate, frameRate); Ebml_EndSubElement(glob, &videoStart); //Video } @@ -920,6 +932,16 @@ static const arg_def_t width = ARG_DEF("w", "width", 1, "Frame width"); static const arg_def_t height = ARG_DEF("h", "height", 1, "Frame height"); +static const struct arg_enum_list stereo_mode_enum[] = { + {"mono" , STEREO_FORMAT_MONO}, + {"left-right", STEREO_FORMAT_LEFT_RIGHT}, + {"bottom-top", STEREO_FORMAT_BOTTOM_TOP}, + {"top-bottom", STEREO_FORMAT_TOP_BOTTOM}, + {"right-left", STEREO_FORMAT_RIGHT_LEFT}, + {NULL, 0} +}; +static const arg_def_t stereo_mode = ARG_DEF_ENUM(NULL, "stereo-mode", 1, + "Stereo 3D video format", stereo_mode_enum); static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1, "Stream timebase (frame duration)"); static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1, @@ -930,7 +952,7 @@ static const arg_def_t lag_in_frames = ARG_DEF(NULL, "lag-in-frames", 1, static const arg_def_t *global_args[] = { &use_yv12, &use_i420, &usage, &threads, &profile, - &width, &height, &timebase, &framerate, &error_resilient, + &width, &height, &stereo_mode, &timebase, &framerate, &error_resilient, &lag_in_frames, NULL }; @@ -966,11 +988,14 @@ static const arg_def_t buf_initial_sz = ARG_DEF(NULL, "buf-initial-sz", 1, "Client initial buffer size (ms)"); static const arg_def_t buf_optimal_sz = ARG_DEF(NULL, "buf-optimal-sz", 1, "Client optimal buffer size (ms)"); +static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1, + "Max I-frame bitrate (pct)"); static const arg_def_t *rc_args[] = { &dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh, &end_usage, &target_bitrate, &min_quantizer, &max_quantizer, &undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz, + &max_intra_rate_pct, NULL }; @@ -1088,7 +1113,6 @@ static void usage_exit() #define ARG_CTRL_CNT_MAX 10 - int main(int argc, const char **argv_) { vpx_codec_ctx_t encoder; @@ -1124,6 +1148,7 @@ int main(int argc, const char **argv_) uint64_t psnr_samples_total = 0; double psnr_totals[4] = {0, 0, 0, 0}; int psnr_count = 0; + stereo_format_t stereo_fmt = STEREO_FORMAT_MONO; exec_name = argv_[0]; ebml.last_pts_ms = -1; @@ -1263,6 +1288,8 @@ int main(int argc, const char **argv_) cfg.g_w = arg_parse_uint(&arg); else if (arg_match(&arg, &height, argi)) cfg.g_h = arg_parse_uint(&arg); + else if (arg_match(&arg, &stereo_mode, argi)) + stereo_fmt = arg_parse_enum_or_int(&arg); else if (arg_match(&arg, &timebase, argi)) cfg.g_timebase = arg_parse_rational(&arg); else if (arg_match(&arg, &error_resilient, argi)) @@ -1283,6 +1310,8 @@ int main(int argc, const char **argv_) cfg.rc_end_usage = arg_parse_enum_or_int(&arg); else if (arg_match(&arg, &target_bitrate, argi)) cfg.rc_target_bitrate = arg_parse_uint(&arg); + else if (arg_match(&arg, &max_intra_rate_pct, argi)) + cfg.rc_max_intra_bitrate_pct = arg_parse_uint(&arg); else if (arg_match(&arg, &min_quantizer, argi)) cfg.rc_min_quantizer = arg_parse_uint(&arg); else if (arg_match(&arg, &max_quantizer, argi)) @@ -1565,7 +1594,7 @@ int main(int argc, const char **argv_) if(write_webm) { ebml.stream = outfile; - write_webm_file_header(&ebml, &cfg, &arg_framerate); + write_webm_file_header(&ebml, &cfg, &arg_framerate, stereo_fmt); } else write_ivf_file_header(outfile, &cfg, codec->fourcc, 0);