From 092b5bef37f87c77a048246d841ba6343c315176 Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 29 Nov 2010 14:21:11 -0500 Subject: [PATCH 1/4] abstract apply_temporal_filter allow for optimized versions of apply_temporal_filter (now vp8_apply_temporal_filter_c) the function was previously declared as static and appears to have been inlined. with this change, that's no longer possible. performance takes a small hit. the declaration for vp8_cx_temp_filter_c was moved to onyx_if.c because of a circular dependency. for rtcd, temporal_filter.h holds the definition for the rtcd table, so it needs to be included by onyx_int.h. however, onyx_int.h holds the definition for VP8_COMP which is needed for the function prototype. blah. Change-Id: I499c055fdc652ac4659c21c5a55fe10ceb7e95e3 --- vp8/encoder/generic/csystemdependent.c | 2 + vp8/encoder/onyx_if.c | 1 + vp8/encoder/onyx_int.h | 2 + vp8/encoder/temporal_filter.c | 53 ++++++++++++++------------ vp8/encoder/temporal_filter.h | 29 +++++++++++++- 5 files changed, 60 insertions(+), 27 deletions(-) diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 824af5e46..898ad76fb 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -94,6 +94,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.search.full_search = vp8_full_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; + + cpi->rtcd.temporal.filter = vp8_apply_temporal_filter_c; #endif // Pure C: diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 279d50d54..b34633393 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -73,6 +73,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi); int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); +extern void vp8cx_temp_filter_c(VP8_COMP *cpi); static void set_default_lf_deltas(VP8_COMP *cpi); diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 56938bec4..990ae1d9e 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -27,6 +27,7 @@ #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" #include "mcomp.h" +#include "temporal_filter.h" //#define SPEEDSTATS 1 #define MIN_GF_INTERVAL 4 @@ -228,6 +229,7 @@ typedef struct VP8_ENCODER_RTCD vp8_encodemb_rtcd_vtable_t encodemb; vp8_quantize_rtcd_vtable_t quantize; vp8_search_rtcd_vtable_t search; + vp8_temporal_rtcd_vtable_t temporal; } VP8_ENCODER_RTCD; enum diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index e4d47462f..31be76ec1 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -111,7 +111,7 @@ static void build_predictors_mb RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8); } } -static void apply_temporal_filter +void vp8_apply_temporal_filter_c ( unsigned char *frame1, unsigned int stride, @@ -440,32 +440,35 @@ static void vp8cx_temp_blur1_c predictor ); // Apply the filter (YUV) - apply_temporal_filter ( f->y_buffer + mb_y_offset, - f->y_stride, - predictor, - 16, - strength, - filter_weight[frame], - accumulator, - count ); + TEMPORAL_INVOKE(&cpi->rtcd.temporal, filter) + (f->y_buffer + mb_y_offset, + f->y_stride, + predictor, + 16, + strength, + filter_weight[frame], + accumulator, + count); - apply_temporal_filter ( f->u_buffer + mb_uv_offset, - f->uv_stride, - predictor + 256, - 8, - strength, - filter_weight[frame], - accumulator + 256, - count + 256 ); + TEMPORAL_INVOKE(&cpi->rtcd.temporal, filter) + (f->u_buffer + mb_uv_offset, + f->uv_stride, + predictor + 256, + 8, + strength, + filter_weight[frame], + accumulator + 256, + count + 256); - apply_temporal_filter ( f->v_buffer + mb_uv_offset, - f->uv_stride, - predictor + 320, - 8, - strength, - filter_weight[frame], - accumulator + 320, - count + 320 ); + TEMPORAL_INVOKE(&cpi->rtcd.temporal, filter) + (f->v_buffer + mb_uv_offset, + f->uv_stride, + predictor + 320, + 8, + strength, + filter_weight[frame], + accumulator + 320, + count + 320); } } diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h index f70e8c01e..3271f6e5a 100644 --- a/vp8/encoder/temporal_filter.h +++ b/vp8/encoder/temporal_filter.h @@ -12,8 +12,33 @@ #ifndef __INC_VP8_TEMPORAL_FILTER_H #define __INC_VP8_TEMPORAL_FILTER_H -#include "onyx_int.h" +#define prototype_filter(sym)\ + void (sym) \ + ( \ + unsigned char *frame1, \ + unsigned int stride, \ + unsigned char *frame2, \ + unsigned int block_size, \ + int strength, \ + int filter_weight, \ + unsigned int *accumulator, \ + unsigned int *count \ + ) -void vp8cx_temp_filter_c(VP8_COMP *cpi); +#ifndef vp8_temporal_filter +#define vp8_temporal_filter vp8_apply_temporal_filter_c +#endif +extern prototype_filter(vp8_temporal_filter); + +typedef struct +{ + prototype_filter(*filter); +} vp8_temporal_rtcd_vtable_t; + +#if CONFIG_RUNTIME_CPU_DETECT +#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn +#else +#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_##fn +#endif #endif // __INC_VP8_TEMPORAL_FILTER_H From 4b6219cb33392fb68f60ec4717a0228c4a453bdc Mon Sep 17 00:00:00 2001 From: Johann Date: Tue, 30 Nov 2010 10:23:43 -0500 Subject: [PATCH 2/4] temporal filter naming changes be more consistant with the naming pattern, especially wrt rtcd Change-Id: I3df50686a09f1dab0a9620b5adbb8a1577b40f2f --- vp8/encoder/generic/csystemdependent.c | 2 +- vp8/encoder/onyx_if.c | 8 ++--- vp8/encoder/temporal_filter.c | 47 +++++++++++++------------- vp8/encoder/temporal_filter.h | 12 +++---- 4 files changed, 35 insertions(+), 34 deletions(-) diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 898ad76fb..be00d0218 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -95,7 +95,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.search.full_search = vp8_full_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; - cpi->rtcd.temporal.filter = vp8_apply_temporal_filter_c; + cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; #endif // Pure C: diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index b34633393..1f890790c 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -73,7 +73,7 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi); int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); -extern void vp8cx_temp_filter_c(VP8_COMP *cpi); +extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi); static void set_default_lf_deltas(VP8_COMP *cpi); @@ -4971,7 +4971,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon { int thiserr; cpi->oxcf.arnr_strength = i; - vp8cx_temp_filter_c(cpi); + vp8_temporal_filter_prepare_c(cpi); thiserr = vp8_calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer, &cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance)); @@ -4986,7 +4986,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (besti != -1) { cpi->oxcf.arnr_strength = besti; - vp8cx_temp_filter_c(cpi); + vp8_temporal_filter_prepare_c(cpi); s = &cpi->alt_ref_buffer; // FWG not sure if I need to copy this data for the Alt Ref frame @@ -4998,7 +4998,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon s = &cpi->src_buffer[cpi->last_alt_ref_sei]; #else - vp8cx_temp_filter_c(cpi); + vp8_temporal_filter_prepare_c(cpi); s = &cpi->alt_ref_buffer; // FWG not sure if I need to copy this data for the Alt Ref frame diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 31be76ec1..8290ef672 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -59,7 +59,7 @@ static int modifier_lut[7][19] = {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} }; #endif -static void build_predictors_mb +static void vp8_temporal_filter_predictors_mb_c ( MACROBLOCKD *x, unsigned char *y_mb_ptr, @@ -111,7 +111,7 @@ static void build_predictors_mb RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8); } } -void vp8_apply_temporal_filter_c +void vp8_temporal_filter_apply_c ( unsigned char *frame1, unsigned int stride, @@ -171,7 +171,7 @@ void vp8_apply_temporal_filter_c #if ALT_REF_MC_ENABLED static int dummy_cost[2*mv_max+1]; -static int find_matching_mb +static int vp8_temporal_filter_find_matching_mb_c ( VP8_COMP *cpi, YV12_BUFFER_CONFIG *arf_frame, @@ -308,7 +308,7 @@ static int find_matching_mb } #endif -static void vp8cx_temp_blur1_c +static void vp8_temporal_filter_iterate_c ( VP8_COMP *cpi, int frame_count, @@ -412,11 +412,12 @@ static void vp8cx_temp_blur1_c #define THRESH_HIGH 20000 // Correlation has been lost try MC - err = find_matching_mb ( cpi, - cpi->frames[alt_ref_index], - cpi->frames[frame], - mb_y_offset, - THRESH_LOW ); + err = vp8_temporal_filter_find_matching_mb_c + (cpi, + cpi->frames[alt_ref_index], + cpi->frames[frame], + mb_y_offset, + THRESH_LOW); if (filter_weight[frame] < 2) { @@ -429,18 +430,18 @@ static void vp8cx_temp_blur1_c if (filter_weight[frame] != 0) { // Construct the predictors - build_predictors_mb ( - mbd, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->u_buffer + mb_uv_offset, - cpi->frames[frame]->v_buffer + mb_uv_offset, - cpi->frames[frame]->y_stride, - mbd->block[0].bmi.mv.as_mv.row, - mbd->block[0].bmi.mv.as_mv.col, - predictor ); + vp8_temporal_filter_predictors_mb_c + (mbd, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->u_buffer + mb_uv_offset, + cpi->frames[frame]->v_buffer + mb_uv_offset, + cpi->frames[frame]->y_stride, + mbd->block[0].bmi.mv.as_mv.row, + mbd->block[0].bmi.mv.as_mv.col, + predictor); // Apply the filter (YUV) - TEMPORAL_INVOKE(&cpi->rtcd.temporal, filter) + TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) (f->y_buffer + mb_y_offset, f->y_stride, predictor, @@ -450,7 +451,7 @@ static void vp8cx_temp_blur1_c accumulator, count); - TEMPORAL_INVOKE(&cpi->rtcd.temporal, filter) + TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) (f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, @@ -460,7 +461,7 @@ static void vp8cx_temp_blur1_c accumulator + 256, count + 256); - TEMPORAL_INVOKE(&cpi->rtcd.temporal, filter) + TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) (f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 320, @@ -537,7 +538,7 @@ static void vp8cx_temp_blur1_c mbd->pre.v_buffer = v_buffer; } -void vp8cx_temp_filter_c +void vp8_temporal_filter_prepare_c ( VP8_COMP *cpi ) @@ -645,7 +646,7 @@ void vp8cx_temp_filter_c = &cpi->src_buffer[which_buffer].source_buffer; } - vp8cx_temp_blur1_c ( + vp8_temporal_filter_iterate_c ( cpi, frames_to_blur, frames_to_blur_backward, diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h index 3271f6e5a..7b8c21c04 100644 --- a/vp8/encoder/temporal_filter.h +++ b/vp8/encoder/temporal_filter.h @@ -12,7 +12,7 @@ #ifndef __INC_VP8_TEMPORAL_FILTER_H #define __INC_VP8_TEMPORAL_FILTER_H -#define prototype_filter(sym)\ +#define prototype_apply(sym)\ void (sym) \ ( \ unsigned char *frame1, \ @@ -25,20 +25,20 @@ unsigned int *count \ ) -#ifndef vp8_temporal_filter -#define vp8_temporal_filter vp8_apply_temporal_filter_c +#ifndef vp8_temporal_filter_apply +#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c #endif -extern prototype_filter(vp8_temporal_filter); +extern prototype_apply(vp8_temporal_filter_apply); typedef struct { - prototype_filter(*filter); + prototype_apply(*apply); } vp8_temporal_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT #define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn #else -#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_##fn +#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_filter_##fn #endif #endif // __INC_VP8_TEMPORAL_FILTER_H From 20b855c33e41363a680d463ac6dba7fe07ffe712 Mon Sep 17 00:00:00 2001 From: Johann Date: Wed, 22 Dec 2010 11:15:56 -0500 Subject: [PATCH 3/4] improve integer version of filter the lookup table is based on floating point calculations (see source) by moving the *3 before the downshift and adding the rounding bit, the delta (LUT - integer) goes from: ______________________________________ __ 1__ 1______________________________ __ 1__ 1______________________________ ____ 1______ 1________________________ ____ 1 2__ 2 1________________________ ______ 1 1 2__ 2__ 2__ 2 1 1__________ ________ 1 1 2 2__ 1 2 3 1 2__ 2__ 2__ to: __-1__-1______________________________ ______________________________________ ____-1______-1________________________ ______________________________________ ________-1______________-1____________ ______________________________________ it's important to be able to use the integer version because the LUT more or less precludes SIMD optimizations Change-Id: I45a81127dc7b72a06fba951649135d9d918386c0 --- vp8/encoder/temporal_filter.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 8290ef672..2fffaa95f 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -36,27 +36,34 @@ #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering +#define USE_FILTER_LUT 0 // use lookup table to improve filter -#define USE_FILTER_LUT 1 #if VP8_TEMPORAL_ALT_REF #if USE_FILTER_LUT +// for (strength = 0; strength <= 6; strength++) { +// for (delta = 0; delta <= 18; delta++) { +// float coeff = (3.0 * delta * delta) / pow(2, strength); +// printf("%3d", (int)roundf(coeff > 16 ? 0 : 16-coeff)); +// } +// printf("\n"); +// } static int modifier_lut[7][19] = { // Strength=0 - {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Strength=1 - {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Strength=2 - {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Strength=3 - {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Strength=4 - {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Strength=5 - {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0}, + {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0}, // Strength=6 - {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} + {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} }; #endif static void vp8_temporal_filter_predictors_mb_c @@ -140,16 +147,14 @@ void vp8_temporal_filter_apply_c int pixel_value = *frame2++; #if USE_FILTER_LUT - // LUT implementation -- - // improves precision of filter modifier = abs(src_byte-pixel_value); modifier = modifier>18 ? 0 : lut[modifier]; #else - modifier = src_byte; - modifier -= pixel_value; + modifier = src_byte - pixel_value; modifier *= modifier; - modifier >>= strength; modifier *= 3; + modifier += 1 << (strength - 1); + modifier >>= strength; if (modifier > 16) modifier = 16; From 74e8446e586380597441094bb9b4d82933fb305d Mon Sep 17 00:00:00 2001 From: James Berry Date: Thu, 23 Dec 2010 14:47:56 -0500 Subject: [PATCH 4/4] vpxenc stats_close() memleak fix stats_close() was not freeing memory for single pass runs. It now takes in arg_passes to determine when it should free memory. Change-Id: I6623b7e30b76f9bf2e16008490f9b20484d03f31 --- vpxenc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vpxenc.c b/vpxenc.c index 5e4fe3f0f..cb911bec0 100755 --- a/vpxenc.c +++ b/vpxenc.c @@ -186,11 +186,11 @@ int stats_open_mem(stats_io_t *stats, int pass) } -void stats_close(stats_io_t *stats) +void stats_close(stats_io_t *stats, int last_pass) { if (stats->file) { - if (stats->pass == 1) + if (stats->pass == last_pass) { #if 0 #elif USE_POSIX_MMAP @@ -205,7 +205,7 @@ void stats_close(stats_io_t *stats) } else { - if (stats->pass == 1) + if (stats->pass == last_pass) free(stats->buf.buf); } } @@ -1692,7 +1692,7 @@ int main(int argc, const char **argv_) } fclose(outfile); - stats_close(&stats); + stats_close(&stats, arg_passes-1); fprintf(stderr, "\n"); if (one_pass_only)