diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index d8a76d5b5..8c40a18e8 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -545,31 +545,29 @@ void vp8_encode_frame(VP8_COMP *cpi) int segment_counts[MAX_MB_SEGMENTS]; int totalrate; - if (cm->frame_type != KEY_FRAME) + // Functions setup for all frame types so we can use MC in AltRef + if (cm->mcomp_filter_type == SIXTAP) { - if (cm->mcomp_filter_type == SIXTAP) - { - xd->subpixel_predict = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap4x4); - xd->subpixel_predict8x4 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x4); - xd->subpixel_predict8x8 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x8); - xd->subpixel_predict16x16 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap16x16); - } - else - { - xd->subpixel_predict = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear4x4); - xd->subpixel_predict8x4 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x4); - xd->subpixel_predict8x8 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x8); - xd->subpixel_predict16x16 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear16x16); - } + xd->subpixel_predict = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap4x4); + xd->subpixel_predict8x4 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap8x4); + xd->subpixel_predict8x8 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap8x8); + xd->subpixel_predict16x16 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap16x16); + } + else + { + xd->subpixel_predict = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear4x4); + xd->subpixel_predict8x4 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear8x4); + xd->subpixel_predict8x8 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear8x8); + xd->subpixel_predict16x16 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear16x16); } - - //else // Key Frame - //{ - // For key frames make sure the intra ref frame probability value - // is set to "all intra" - //cpi->prob_intra_coded = 255; - //} - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index a65bce6e1..684ad9b12 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -258,7 +258,7 @@ void vp8_output_stats(struct vpx_codec_pkt_list *pktlist, vpx_codec_pkt_list_add(pktlist, &pkt); // TEMP debug code -#ifdef OUTPUT_FPF +#if OUTPUT_FPF { FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); @@ -369,50 +369,33 @@ void vp8_fpmm_reset_pos(VP8_COMP *cpi, int target_pos) void vp8_advance_fpmm(VP8_COMP *cpi, int count) { -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM fseek(cpi->fp_motion_mapfile, (int)(count * cpi->common.MBs), SEEK_CUR); #endif } -void vp8_input_fpmm(VP8_COMP *cpi, int count) +void vp8_input_fpmm(VP8_COMP *cpi) { -#ifdef FIRSTPASS_MM - - unsigned char *tmp_motion_map; - int i, j; +#if FIRSTPASS_MM + int MBs = cpi->common.MBs; + int max_frames = cpi->active_arnr_frames; if (!cpi->fp_motion_mapfile) return; // Error - // Create the first pass motion map structure and set to 0 - CHECK_MEM_ERROR(tmp_motion_map, vpx_calloc(cpi->common.MBs, 1)); - - // Reset the state of the global map - vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs); - - // Read the specified number of frame maps and set the global map to the highest value seen for each mb. - for (i = 0; i < count; i++) + // Read the specified number of frame motion maps + if (fread(cpi->fp_motion_map, 1, + max_frames * MBs, + cpi->fp_motion_mapfile) != max_frames*MBs) { - if (fread(tmp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile) == cpi->common.MBs) - { - for (j = 0; j < cpi->common.MBs; j++) - { - if (tmp_motion_map[j] > 1) - cpi->fp_motion_map[j] += 5; // Intra is flagged - else - cpi->fp_motion_map[j] += tmp_motion_map[j]; - } - } - else - break; // Read error - + // Read error + return; } - if (tmp_motion_map != 0) - vpx_free(tmp_motion_map); + // Flag the use of weights in the temporal filter + cpi->use_weighted_temporal_filter = 1; #endif - } void vp8_init_first_pass(VP8_COMP *cpi) @@ -438,7 +421,7 @@ void vp8_end_first_pass(VP8_COMP *cpi) { vp8_output_stats(cpi->output_pkt_list, &cpi->total_stats); -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM if (cpi->fp_motion_mapfile) fclose(cpi->fp_motion_mapfile); @@ -603,6 +586,8 @@ void vp8_first_pass(VP8_COMP *cpi) for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int this_error; + int zero_error; + int zz_to_best_ratio; int gf_motion_error = INT_MAX; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); @@ -624,7 +609,7 @@ void vp8_first_pass(VP8_COMP *cpi) intra_error += this_error; // Indicate default assumption of intra in the motion map - *fp_motion_map_ptr = 2; + *fp_motion_map_ptr = 0; // Set up limit values for motion vectors to prevent them extending outside the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); @@ -646,6 +631,9 @@ void vp8_first_pass(VP8_COMP *cpi) d->bmi.mv.as_mv.row = 0; d->bmi.mv.as_mv.col = 0; + // Save (0,0) error for later use + zero_error = motion_error; + // Test last reference frame using the previous best mv as the // starting point (best reference) for the search vp8_first_pass_motion_search(cpi, x, &best_ref_mv, @@ -719,8 +707,6 @@ void vp8_first_pass(VP8_COMP *cpi) { mvcount++; - *fp_motion_map_ptr = 1; - // Does the Row vector point inwards or outwards if (mb_row < cm->mb_rows / 2) { @@ -752,12 +738,30 @@ void vp8_first_pass(VP8_COMP *cpi) else if (d->bmi.mv.as_mv.col < 0) sum_in_vectors--; } + + // Compute how close (0,0) predictor is to best + // predictor in terms of their prediction error + zz_to_best_ratio = (10*zero_error + this_error/2) + / (this_error+!this_error); + + if ((zero_error < 50000) && + (zz_to_best_ratio <= 11) ) + *fp_motion_map_ptr = 1; + else + *fp_motion_map_ptr = 0; } else - *fp_motion_map_ptr = 0; // 0,0 mv was best + { + // 0,0 mv was best + if( zero_error<50000 ) + *fp_motion_map_ptr = 2; + else + *fp_motion_map_ptr = 1; + } } else { + // Intra was best best_ref_mv.row = 0; best_ref_mv.col = 0; } @@ -839,7 +843,7 @@ void vp8_first_pass(VP8_COMP *cpi) vp8_output_stats(cpi->output_pkt_list, &cpi->this_frame_stats); vp8_accumulate_stats(&cpi->total_stats, &fps); -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile); #endif } @@ -1180,7 +1184,7 @@ void vp8_init_second_pass(VP8_COMP *cpi) } -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM cpi->fp_motion_mapfile = 0; cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "rb"); #endif @@ -1189,7 +1193,7 @@ void vp8_init_second_pass(VP8_COMP *cpi) void vp8_end_second_pass(VP8_COMP *cpi) { -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM if (cpi->fp_motion_mapfile) fclose(cpi->fp_motion_mapfile); @@ -1230,7 +1234,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int max_bits = frame_max_bits(cpi); // Max for a single frame -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM int fpmm_pos; #endif @@ -1239,7 +1243,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) vp8_clear_system_state(); //__asm emms; -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM fpmm_pos = vp8_fpmm_get_pos(cpi); #endif @@ -1452,6 +1456,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames. if (tmp_q < cpi->worst_quality) { + int half_gf_int; + int frames_after_arf; + int frames_bwd = cpi->oxcf.arnr_max_frames - 1; + int frames_fwd = cpi->oxcf.arnr_max_frames - 1; + cpi->source_alt_ref_pending = TRUE; // For alt ref frames the error score for the end frame of the group (the alt ref frame) should not contribute to the group total and hence @@ -1462,20 +1471,63 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // The future frame itself is part of the next group cpi->baseline_gf_interval = i - 1; -#ifdef FIRSTPASS_MM - // Read through the motion map to load up the entry for the ARF - { - int j; + // Define the arnr filter width for this group of frames: + // We only filter frames that lie within a distance of half + // the GF interval from the ARF frame. We also have to trap + // cases where the filter extends beyond the end of clip. + // Note: this_frame->frame has been updated in the loop + // so it now points at the ARF frame. + half_gf_int = cpi->baseline_gf_interval >> 1; + frames_after_arf = cpi->total_stats.count - this_frame->frame - 1; - // Advance to the region of interest - // Current default 2 frames before to 2 frames after the ARF frame itsef + switch (cpi->oxcf.arnr_type) + { + case 1: // Backward filter + frames_fwd = 0; + if (frames_bwd > half_gf_int) + frames_bwd = half_gf_int; + break; + + case 2: // Forward filter + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + frames_bwd = 0; + break; + + case 3: // Centered filter + default: + frames_fwd >>= 1; + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + + frames_bwd = frames_fwd; + + // For even length filter there is one more frame backward + // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. + if (frames_bwd < half_gf_int) + frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1; + break; + } + + cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; + +#if FIRSTPASS_MM + { + // Advance to & read in the motion map for those frames + // to be considered for filtering based on the position + // of the ARF vp8_fpmm_reset_pos(cpi, cpi->fpmm_pos); - for (j = 0; j < cpi->baseline_gf_interval - 2; j++) - vp8_advance_fpmm(cpi, 1); + // Position at the 'earliest' frame to be filtered + vp8_advance_fpmm(cpi, + cpi->baseline_gf_interval - frames_bwd); // Read / create a motion map for the region of interest - vp8_input_fpmm(cpi, 5); + vp8_input_fpmm(cpi); } #endif } @@ -1713,7 +1765,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) reset_fpf_position(cpi, start_pos); } -#ifdef FIRSTPASS_MM +#if FIRSTPASS_MM // Reset the First pass motion map file position vp8_fpmm_reset_pos(cpi, fpmm_pos); #endif @@ -1798,10 +1850,13 @@ void vp8_second_pass(VP8_COMP *cpi) if (EOF == vp8_input_stats(cpi, &this_frame)) return; -#ifdef FIRSTPASS_MM - vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs); +#if FIRSTPASS_MM + vpx_memset(cpi->fp_motion_map, 0, + cpi->oxcf.arnr_max_frames*cpi->common.MBs); cpi->fpmm_pos = vp8_fpmm_get_pos(cpi); - vp8_advance_fpmm(cpi, 1); // Read this frame's first pass motion map + + // Step over this frame's first pass motion map + vp8_advance_fpmm(cpi, 1); #endif this_frame_error = this_frame.ssim_weighted_pred_err; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index ed112b4d7..ce15acaac 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -43,6 +43,9 @@ #define RTCD(x) NULL #endif +#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering +#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering + extern void vp8cx_init_mv_bits_sadcost(); extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi); extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val); @@ -1662,13 +1665,16 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) #endif #if VP8_TEMPORAL_ALT_REF + + cpi->use_weighted_temporal_filter = 0; + { int i; cpi->fixed_divide[0] = 0; - for (i = 1; i < 255; i++) - cpi->fixed_divide[i] = 0x10000 / i; + for (i = 1; i < 512; i++) + cpi->fixed_divide[i] = 0x80000 / i; } #endif } @@ -2042,7 +2048,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->active_map_enabled = 0; // Create the first pass motion map structure and set to 0 - CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(cpi->common.MBs, 1)); + // Allocate space for maximum of 15 buffers + CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1)); #if 0 // Experimental code for lagged and one pass @@ -3290,97 +3297,479 @@ static int modifier_lut[7][19] = {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} }; #endif -static void vp8cx_temp_blur1_c +static void build_predictors_mb ( - VP8_COMP *cpi, - unsigned char **frames, - int frame_count, - unsigned char *src, - unsigned char *dst, - int width, + MACROBLOCKD *x, + unsigned char *y_mb_ptr, + unsigned char *u_mb_ptr, + unsigned char *v_mb_ptr, int stride, - int height, - int strength, - int *fixed_divide, - unsigned char *motion_map_ptr, - unsigned char block_size + int mv_row, + int mv_col, + unsigned char *pred +) +{ + int offset; + unsigned char *yptr, *uptr, *vptr; + + // Y + yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); + + if ((mv_row | mv_col) & 7) + { +// vp8_sixtap_predict16x16_c(yptr, stride, +// mv_col & 7, mv_row & 7, &pred[0], 16); + x->subpixel_predict16x16(yptr, stride, + mv_col & 7, mv_row & 7, &pred[0], 16); + } + else + { + //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16); + RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16); + } + + // U & V + mv_row >>= 1; + mv_col >>= 1; + stride >>= 1; + offset = (mv_row >> 3) * stride + (mv_col >> 3); + uptr = u_mb_ptr + offset; + vptr = v_mb_ptr + offset; + + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict8x8(uptr, stride, + mv_col & 7, mv_row & 7, &pred[256], 8); + x->subpixel_predict8x8(vptr, stride, + mv_col & 7, mv_row & 7, &pred[320], 8); + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8); + RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8); + } +} +static void apply_temporal_filter +( + unsigned char *frame1, + unsigned int stride, + unsigned char *frame2, + unsigned int block_size, + int strength, + int filter_weight, + int *accumulator, + int *count ) { - int byte = 0; // Buffer offset for current pixel being filtered - int frame = 0; - int modifier = 0; int i, j, k; - int block_ofset; - int cols; - unsigned char Shift = (block_size == 16) ? 4 : 3; + int modifier; + int byte = 0; + #if USE_FILTER_LUT int *lut = modifier_lut[strength]; #endif - cols = cpi->common.mb_cols; - - for (i = 0; i < height; i++) + for (i = 0,k = 0; i < block_size; i++) { - block_ofset = (i >> Shift) * cols; - - for (j = 0; j < cols; j ++) + for (j = 0; j < block_size; j++, k++) { - if (motion_map_ptr[block_ofset] > 2) - { - vpx_memcpy(&dst[byte], &src[byte], block_size); - byte += block_size; - } + + int src_byte = frame1[byte]; + int pixel_value = *frame2++; + +#if USE_FILTER_LUT + // LUT implementation -- + // improves precision of filter + modifier = abs(src_byte-pixel_value); + modifier = modifier>18 ? 0 : lut[modifier]; +#else + modifier = src_byte; + modifier -= pixel_value; + modifier *= modifier; + modifier >>= strength; + modifier *= 3; + + if (modifier > 16) + modifier = 16; + + modifier = 16 - modifier; +#endif + modifier *= filter_weight; + + count[k] += modifier; + accumulator[k] += modifier * pixel_value; + + byte++; + } + + byte += stride - block_size; + } +} + +#if ALT_REF_MC_ENABLED +static int dummy_cost[2*mv_max+1]; + +static int find_matching_mb +( + VP8_COMP *cpi, + YV12_BUFFER_CONFIG *arf_frame, + YV12_BUFFER_CONFIG *frame_ptr, + int mb_offset, + int error_thresh +) +{ + MACROBLOCK *x = &cpi->mb; + int thissme; + int step_param; + int further_steps; + int n = 0; + int sadpb = x->sadperbit16; + int bestsme = INT_MAX; + int num00 = 0; + + BLOCK *b = &x->block[0]; + BLOCKD *d = &x->e_mbd.block[0]; + MV best_ref_mv1 = {0,0}; + + int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; + int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; + + // Save input state + unsigned char **base_src = b->base_src; + int src = b->src; + int src_stride = b->src_stride; + unsigned char **base_pre = d->base_pre; + int pre = d->pre; + int pre_stride = d->pre_stride; + + // Setup frame pointers + b->base_src = &arf_frame->y_buffer; + b->src_stride = arf_frame->y_stride; + b->src = mb_offset; + + d->base_pre = &frame_ptr->y_buffer; + d->pre_stride = frame_ptr->y_stride; + d->pre = mb_offset; + + // Further step/diamond searches as necessary + if (cpi->Speed < 8) + { + step_param = cpi->sf.first_step + + ((cpi->Speed > 5) ? 1 : 0); + further_steps = + (cpi->sf.max_step_search_steps - 1)-step_param; + } + else + { + step_param = cpi->sf.first_step + 2; + further_steps = 0; + } + + if (1/*cpi->sf.search_method == HEX*/) + { + // TODO Check that the 16x16 vf & sdf are selected here + bestsme = vp8_hex_search(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param, + sadpb/*x->errorperbit*/, + &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, + mvsadcost, mvcost); + } + else + { + int mv_x, mv_y; + + bestsme = cpi->diamond_search_sad(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param, + sadpb / 2/*x->errorperbit*/, + &num00, &cpi->fn_ptr, + mvsadcost, mvcost); //sadpb < 9 + + // Further step/diamond searches as necessary + n = 0; + //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + + n = num00; + num00 = 0; + + while (n < further_steps) + { + n++; + + if (num00) + num00--; else { - for (k = 0; k < block_size; k++) + thissme = cpi->diamond_search_sad(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param + n, + sadpb / 4/*x->errorperbit*/, + &num00, &cpi->fn_ptr, + mvsadcost, mvcost); //sadpb = 9 + + if (thissme < bestsme) { - int accumulator = 0; - int count = 0; - int src_byte = src[byte]; + bestsme = thissme; + mv_y = d->bmi.mv.as_mv.row; + mv_x = d->bmi.mv.as_mv.col; + } + else + { + d->bmi.mv.as_mv.row = mv_y; + d->bmi.mv.as_mv.col = mv_x; + } + } + } + } - for (frame = 0; frame < frame_count; frame++) - { - // get current frame pixel value - int pixel_value = frames[frame][byte]; -#if USE_FILTER_LUT - // LUT implementation -- - // improves precision of filter - modifier = abs(src_byte-pixel_value); - modifier = modifier>18 ? 0 : lut[modifier]; -#else - modifier = src_byte; - modifier -= pixel_value; - modifier *= modifier; - modifier >>= strength; - modifier *= 3; - - if (modifier > 16) - modifier = 16; - - modifier = 16 - modifier; +#if ALT_REF_SUBPEL_ENABLED + // Try sub-pixel MC? + //if (bestsme > error_thresh && bestsme < INT_MAX) + { + bestsme = cpi->find_fractional_mv_step(x, b, d, + &d->bmi.mv.as_mv, &best_ref_mv1, + x->errorperbit, cpi->fn_ptr.svf, + cpi->fn_ptr.vf, cpi->mb.mvcost); + } #endif - accumulator += modifier * pixel_value; - count += modifier; + // Save input state + b->base_src = base_src; + b->src = src; + b->src_stride = src_stride; + d->base_pre = base_pre; + d->pre = pre; + d->pre_stride = pre_stride; + + return bestsme; +} +#endif + +static void vp8cx_temp_blur1_c +( + VP8_COMP *cpi, + int frame_count, + int alt_ref_index, + int strength +) +{ + int byte; + int frame; + int mb_col, mb_row; + unsigned int filter_weight[MAX_LAG_BUFFERS]; + unsigned char *mm_ptr = cpi->fp_motion_map; + int cols = cpi->common.mb_cols; + int rows = cpi->common.mb_rows; + int MBs = cpi->common.MBs; + int mb_y_offset = 0; + int mb_uv_offset = 0; + unsigned int accumulator[384]; + unsigned int count[384]; + MACROBLOCKD *mbd = &cpi->mb.e_mbd; + YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; + unsigned char *dst1, *dst2; + DECLARE_ALIGNED(16, unsigned char, predictor[384]); + + // Save input state + unsigned char *y_buffer = mbd->pre.y_buffer; + unsigned char *u_buffer = mbd->pre.u_buffer; + unsigned char *v_buffer = mbd->pre.v_buffer; + + if (!cpi->use_weighted_temporal_filter) + { + // Temporal filtering is unweighted + for (frame = 0; frame < frame_count; frame++) + filter_weight[frame] = 1; + } + + for (mb_row = 0; mb_row < rows; mb_row++) + { +#if ALT_REF_MC_ENABLED + // Reduced search extent by 3 for 6-tap filter & smaller UMV border + cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19)); + cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + + (VP8BORDERINPIXELS - 19); +#endif + + for (mb_col = 0; mb_col < cols; mb_col++) + { + int i, j, k, w; + int weight_cap; + int stride; + + vpx_memset(accumulator, 0, 384*sizeof(unsigned int)); + vpx_memset(count, 0, 384*sizeof(unsigned int)); + +#if ALT_REF_MC_ENABLED + // Reduced search extent by 3 for 6-tap filter & smaller UMV border + cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19)); + cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) + + (VP8BORDERINPIXELS - 19); +#endif + + // Read & process macroblock weights from motion map + if (cpi->use_weighted_temporal_filter) + { + weight_cap = 2; + + for (frame = alt_ref_index-1; frame >= 0; frame--) + { + w = *(mm_ptr + (frame+1)*MBs); + filter_weight[frame] = w < weight_cap ? w : weight_cap; + weight_cap = w; + } + + filter_weight[alt_ref_index] = 2; + + weight_cap = 2; + + for (frame = alt_ref_index+1; frame < frame_count; frame++) + { + w = *(mm_ptr + frame*MBs); + filter_weight[frame] = w < weight_cap ? w : weight_cap; + weight_cap = w; + } + + } + + for (frame = 0; frame < frame_count; frame++) + { + int err; + + if (cpi->frames[frame] == NULL) + continue; + + mbd->block[0].bmi.mv.as_mv.row = 0; + mbd->block[0].bmi.mv.as_mv.col = 0; + +#if ALT_REF_MC_ENABLED + //if (filter_weight[frame] == 0) + { +#define THRESH_LOW 10000 +#define THRESH_HIGH 20000 + + // Correlation has been lost try MC + err = find_matching_mb ( cpi, + cpi->frames[alt_ref_index], + cpi->frames[frame], + mb_y_offset, + THRESH_LOW ); + + if (filter_weight[frame] < 2) + { + // Set weight depending on error + filter_weight[frame] = errframes[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->u_buffer + mb_uv_offset, + cpi->frames[frame]->v_buffer + mb_uv_offset, + cpi->frames[frame]->y_stride, + mbd->block[0].bmi.mv.as_mv.row, + mbd->block[0].bmi.mv.as_mv.col, + predictor ); - accumulator += (count >> 1); - accumulator *= fixed_divide[count]; - accumulator >>= 16; + // Apply the filter (YUV) + apply_temporal_filter ( f->y_buffer + mb_y_offset, + f->y_stride, + predictor, + 16, + strength, + filter_weight[frame], + accumulator, + count ); - dst[byte] = accumulator; + apply_temporal_filter ( f->u_buffer + mb_uv_offset, + f->uv_stride, + predictor + 256, + 8, + strength, + filter_weight[frame], + accumulator + 256, + count + 256 ); + + apply_temporal_filter ( f->v_buffer + mb_uv_offset, + f->uv_stride, + predictor + 320, + 8, + strength, + filter_weight[frame], + accumulator + 320, + count + 320 ); + } + } + + // Normalize filter output to produce AltRef frame + dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer; + stride = cpi->alt_ref_buffer.source_buffer.y_stride; + byte = mb_y_offset; + for (i = 0,k = 0; i < 16; i++) + { + for (j = 0; j < 16; j++, k++) + { + unsigned int pval = accumulator[k] + (count[k] >> 1); + pval *= cpi->fixed_divide[count[k]]; + pval >>= 19; + + dst1[byte] = (unsigned char)pval; // move to next pixel byte++; } + + byte += stride - 16; } - block_ofset++; + dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer; + dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer; + stride = cpi->alt_ref_buffer.source_buffer.uv_stride; + byte = mb_uv_offset; + for (i = 0,k = 256; i < 8; i++) + { + for (j = 0; j < 8; j++, k++) + { + int m=k+64; + + // U + unsigned int pval = accumulator[k] + (count[k] >> 1); + pval *= cpi->fixed_divide[count[k]]; + pval >>= 19; + dst1[byte] = (unsigned char)pval; + + // V + pval = accumulator[m] + (count[m] >> 1); + pval *= cpi->fixed_divide[count[m]]; + pval >>= 19; + dst2[byte] = (unsigned char)pval; + + // move to next pixel + byte++; + } + + byte += stride - 8; + } + + mm_ptr++; + mb_y_offset += 16; + mb_uv_offset += 8; } - // Step byte on over the UMV border to the start of the next line - byte += stride - width; + mb_y_offset += 16*f->y_stride-f->y_width; + mb_uv_offset += 8*f->uv_stride-f->uv_width; } + + // Restore input state + mbd->pre.y_buffer = y_buffer; + mbd->pre.u_buffer = u_buffer; + mbd->pre.v_buffer = v_buffer; } static void vp8cx_temp_filter_c @@ -3388,11 +3777,7 @@ static void vp8cx_temp_filter_c VP8_COMP *cpi ) { - YV12_BUFFER_CONFIG *temp_source_buffer; - int *fixed_divide = cpi->fixed_divide; - int frame = 0; - int max_frames = 11; int num_frames_backward = 0; int num_frames_forward = 0; @@ -3400,15 +3785,13 @@ static void vp8cx_temp_filter_c int frames_to_blur_forward = 0; int frames_to_blur = 0; int start_frame = 0; + unsigned int filtered = 0; int strength = cpi->oxcf.arnr_strength; int blur_type = cpi->oxcf.arnr_type; - int new_max_frames = cpi->oxcf.arnr_max_frames; - - if (new_max_frames > 0) - max_frames = new_max_frames; + int max_frames = cpi->active_arnr_frames; num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index; @@ -3455,8 +3838,9 @@ static void vp8cx_temp_filter_c if (frames_to_blur_backward > frames_to_blur_forward) frames_to_blur_backward = frames_to_blur_forward; - if (frames_to_blur_forward > (max_frames / 2)) - frames_to_blur_forward = (max_frames / 2); + // When max_frames is even we have 1 more frame backward than forward + if (frames_to_blur_forward > (max_frames - 1) / 2) + frames_to_blur_forward = ((max_frames - 1) / 2); if (frames_to_blur_backward > (max_frames / 2)) frames_to_blur_backward = (max_frames / 2); @@ -3488,7 +3872,8 @@ static void vp8cx_temp_filter_c break; } - start_frame = (cpi->last_alt_ref_sei + frames_to_blur_forward) % cpi->oxcf.lag_in_frames; + start_frame = (cpi->last_alt_ref_sei + + frames_to_blur_forward) % cpi->oxcf.lag_in_frames; #ifdef DEBUGFWG // DEBUG FWG @@ -3504,6 +3889,8 @@ static void vp8cx_temp_filter_c , start_frame); #endif + // Setup frame pointers, NULL indicates frame not included in filter + vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; frame++) { int which_buffer = start_frame - frame; @@ -3511,80 +3898,26 @@ static void vp8cx_temp_filter_c if (which_buffer < 0) which_buffer += cpi->oxcf.lag_in_frames; - cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.y_buffer; + cpi->frames[frames_to_blur-1-frame] + = &cpi->src_buffer[which_buffer].source_buffer; } - temp_source_buffer = &cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer; - - // Blur Y - vp8cx_temp_blur1_c( + vp8cx_temp_blur1_c ( cpi, - cpi->frames, frames_to_blur, - temp_source_buffer->y_buffer, // cpi->Source->y_buffer, - cpi->alt_ref_buffer.source_buffer.y_buffer, // cpi->Source->y_buffer, - temp_source_buffer->y_width, - temp_source_buffer->y_stride, - temp_source_buffer->y_height, - //temp_source_buffer->y_height * temp_source_buffer->y_stride, - strength, - fixed_divide, - cpi->fp_motion_map, 16); - - for (frame = 0; frame < frames_to_blur; frame++) - { - int which_buffer = start_frame - frame; - - if (which_buffer < 0) - which_buffer += cpi->oxcf.lag_in_frames; - - cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.u_buffer; - } - - // Blur U - vp8cx_temp_blur1_c( - cpi, - cpi->frames, - frames_to_blur, - temp_source_buffer->u_buffer, - cpi->alt_ref_buffer.source_buffer.u_buffer, // cpi->Source->u_buffer, - temp_source_buffer->uv_width, - temp_source_buffer->uv_stride, - temp_source_buffer->uv_height, - //temp_source_buffer->uv_height * temp_source_buffer->uv_stride, - strength, - fixed_divide, - cpi->fp_motion_map, 8); - - for (frame = 0; frame < frames_to_blur; frame++) - { - int which_buffer = start_frame - frame; - - if (which_buffer < 0) - which_buffer += cpi->oxcf.lag_in_frames; - - cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.v_buffer; - } - - // Blur V - vp8cx_temp_blur1_c( - cpi, - cpi->frames, - frames_to_blur, - temp_source_buffer->v_buffer, - cpi->alt_ref_buffer.source_buffer.v_buffer, // cpi->Source->v_buffer, - temp_source_buffer->uv_width, - temp_source_buffer->uv_stride, - //temp_source_buffer->uv_height * temp_source_buffer->uv_stride, - temp_source_buffer->uv_height, - strength, - fixed_divide, - cpi->fp_motion_map, 8); + frames_to_blur_backward, + strength ); } #endif -static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags) +static void encode_frame_to_data_rate +( + VP8_COMP *cpi, + unsigned long *size, + unsigned char *dest, + unsigned int *frame_flags +) { int Q; int frame_over_shoot_limit; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 211f65912..0ea6a3b47 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -378,6 +378,7 @@ typedef struct int max_gf_interval; int baseline_gf_interval; int gf_decay_rate; + int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames INT64 key_frame_count; INT64 tot_key_frame_bits; @@ -616,9 +617,11 @@ typedef struct #endif #if VP8_TEMPORAL_ALT_REF SOURCE_SAMPLE alt_ref_buffer; - unsigned char *frames[MAX_LAG_BUFFERS]; - int fixed_divide[255]; + YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; + int fixed_divide[512]; #endif + // Flag to indicate temporal filter method + int use_weighted_temporal_filter; #if CONFIG_PSNR int count;