diff --git a/libs.mk b/libs.mk index c09581cc3..ae86fbd9e 100644 --- a/libs.mk +++ b/libs.mk @@ -198,7 +198,7 @@ libvpx.ver: $(call enabled,CODEC_EXPORTS) $(qexec)echo "local: *; };" >> $@ CLEAN-OBJS += libvpx.ver -$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)): +$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)): $(DIST_DIR)/$(LIBSUBDIR)/$(LIBVPX_SO) @echo " [LN] $@" $(qexec)ln -sf $(LIBVPX_SO) $@ diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 571fac17c..dcd2a2f49 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -214,11 +214,25 @@ static int frame_max_bits(VP8_COMP *cpi) int max_bits; // For CBR we need to also consider buffer fullness. + // If we are running below the optimal level then we need to gradually tighten up on max_bits. if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - max_bits = 2 * cpi->av_per_frame_bandwidth; - max_bits -= cpi->buffered_av_per_frame_bandwidth; - max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0); + double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); + + // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user + max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + + // If our buffer is below the optimum level + if (buffer_fullness_ratio < 1.0) + { + // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. + int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2; + + max_bits = (int)(max_bits * buffer_fullness_ratio); + + if (max_bits < min_max_bits) + max_bits = min_max_bits; // Lowest value we will set ... which should allow the buffer to refil. + } } // VBR else @@ -235,45 +249,6 @@ static int frame_max_bits(VP8_COMP *cpi) } -static int gf_group_max_bits(VP8_COMP *cpi) -{ - // Max allocation for a golden frame group - int max_bits; - - // For CBR we need to also consider buffer fullness. - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) - { - max_bits = cpi->av_per_frame_bandwidth * cpi->baseline_gf_interval; - if (max_bits > cpi->oxcf.optimal_buffer_level) - { - max_bits -= cpi->oxcf.optimal_buffer_level; - max_bits += cpi->buffer_level; - } - else - { - max_bits -= (cpi->buffered_av_per_frame_bandwidth - - cpi->av_per_frame_bandwidth) - * cpi->baseline_gf_interval; - } - - max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0); - } - else - { - // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user - max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); - max_bits *= cpi->baseline_gf_interval; - } - - - // Trap case where we are out of bits - if (max_bits < 0) - max_bits = 0; - - return max_bits; -} - - static void output_stats(const VP8_COMP *cpi, struct vpx_codec_pkt_list *pktlist, FIRSTPASS_STATS *stats) @@ -1358,7 +1333,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double abs_mv_in_out_accumulator = 0.0; double mod_err_per_mb_accumulator = 0.0; - int max_group_bits; + int max_bits = frame_max_bits(cpi); // Max for a single frame unsigned int allow_alt_ref = cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; @@ -1711,9 +1686,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; // Clip cpi->twopass.gf_group_bits based on user supplied data rate variability limit (cpi->oxcf.two_pass_vbrmax_section) - max_group_bits = gf_group_max_bits(cpi); - if (cpi->twopass.gf_group_bits > max_group_bits) - cpi->twopass.gf_group_bits = max_group_bits; + if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) + cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; // Reset the file position reset_fpf_position(cpi, start_pos); @@ -1808,6 +1782,13 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } } + // Apply an additional limit for CBR + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + { + if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1)) + cpi->twopass.gf_bits = cpi->buffer_level >> 1; + } + // Dont allow a negative value for gf_bits if (gf_bits < 0) gf_bits = 0; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 2d1b5b8b3..3b86ba041 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1543,7 +1543,6 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; - cpi->buffered_av_per_frame_bandwidth = cpi->av_per_frame_bandwidth; cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; @@ -1639,7 +1638,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) break; } - if (cpi->pass == 0 && cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER) + if (cpi->pass == 0) cpi->auto_worst_q = 1; cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; @@ -3529,8 +3528,7 @@ static void encode_frame_to_data_rate // For CBR if the buffer reaches its maximum level then we can no longer // save up bits for later frames so we might as well use them up // on the current frame. - if (cpi->pass == 2 - && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && + if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode) { int Adjustment = cpi->active_worst_quality / 4; // Max adjustment is 1/4 @@ -3621,9 +3619,6 @@ static void encode_frame_to_data_rate } else { - if(cpi->pass != 2) - Q = cpi->avg_frame_qindex; - cpi->active_best_quality = inter_minq[Q]; // For the constant/constrained quality mode we dont want @@ -3936,16 +3931,15 @@ static void encode_frame_to_data_rate (cpi->active_worst_quality < cpi->worst_quality) && (cpi->projected_frame_size > frame_over_shoot_limit)) { - /* step down active_worst_quality such that the corresponding - * active_best_quality will be equal to the current - * active_worst_quality + 1 - */ - int i; + int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit; - for(i=cpi->active_worst_quality; iworst_quality; i++) - if(inter_minq[i] >= cpi->active_worst_quality + 1) - break; - cpi->active_worst_quality = i; + // If so is there any scope for relaxing it + while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0)) + { + cpi->active_worst_quality++; + top_index = cpi->active_worst_quality; + over_size_percent = (int)(over_size_percent * 0.96); // Assume 1 qstep = about 4% on frame size. + } // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop. active_worst_qchanged = TRUE; @@ -4333,9 +4327,10 @@ static void encode_frame_to_data_rate // Update the buffer level variable. // Non-viewable frames are a special case and are treated as pure overhead. - if ( cm->show_frame ) - cpi->bits_off_target += cpi->av_per_frame_bandwidth; - cpi->bits_off_target -= cpi->projected_frame_size; + if ( !cm->show_frame ) + cpi->bits_off_target -= cpi->projected_frame_size; + else + cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass. cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; @@ -4349,33 +4344,7 @@ static void encode_frame_to_data_rate // Debug stats cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); - // Update the buffered average bitrate - { - long long numerator; - - numerator = cpi->oxcf.maximum_buffer_size - - cpi->buffered_av_per_frame_bandwidth - + cpi->projected_frame_size; - numerator *= cpi->buffered_av_per_frame_bandwidth; - cpi->buffered_av_per_frame_bandwidth = numerator - / cpi->oxcf.maximum_buffer_size; - } - - { - long long tmp = (long long)cpi->buffered_av_per_frame_bandwidth - * cpi->oxcf.maximum_buffer_size - / cpi->av_per_frame_bandwidth; - cpi->buffer_level = cpi->oxcf.maximum_buffer_size - - tmp - + cpi->oxcf.optimal_buffer_level; - } - - // Accumulate overshoot error. - cpi->accumulated_overshoot += - (cpi->projected_frame_size > cpi->av_per_frame_bandwidth) - ? cpi->projected_frame_size - cpi->av_per_frame_bandwidth - : 0; - + cpi->buffer_level = cpi->bits_off_target; // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames if (cm->frame_type == KEY_FRAME) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 15fc1947d..8bd11a1f1 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -355,10 +355,6 @@ typedef struct VP8_COMP int per_frame_bandwidth; // Current section per frame bandwidth target int av_per_frame_bandwidth; // Average frame size target for clip int min_frame_bandwidth; // Minimum allocation that should be used for any frame - int buffered_av_per_frame_bandwidth; // Average bitrate over the last buffer - int buffered_av_per_frame_bandwidth_rem; // Average bitrate remainder - int accumulated_overshoot; // Accumulated # of bits spent > target - int inter_frame_target; double output_frame_rate; long long last_time_stamp_seen; diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 642660a0e..78b3b4715 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -650,10 +650,10 @@ static void calc_gf_params(VP8_COMP *cpi) static void calc_pframe_target_size(VP8_COMP *cpi) { - int min_frame_target, max_frame_target; + int min_frame_target; int Adjustment; - min_frame_target = 1; + min_frame_target = 0; if (cpi->pass == 2) { @@ -661,19 +661,10 @@ static void calc_pframe_target_size(VP8_COMP *cpi) if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5)) min_frame_target = cpi->av_per_frame_bandwidth >> 5; - - max_frame_target = INT_MAX; } - else - { - if (min_frame_target < cpi->per_frame_bandwidth / 4) - min_frame_target = cpi->per_frame_bandwidth / 4; + else if (min_frame_target < cpi->per_frame_bandwidth / 4) + min_frame_target = cpi->per_frame_bandwidth / 4; - /* Don't allow the target to completely deplete the buffer. */ - max_frame_target = cpi->buffer_level + cpi->av_per_frame_bandwidth; - if(max_frame_target < min_frame_target) - max_frame_target = min_frame_target; - } // Special alt reference frame case if (cpi->common.refresh_alt_ref_frame) @@ -1166,32 +1157,6 @@ static void calc_pframe_target_size(VP8_COMP *cpi) } } - - if (cpi->pass==0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER){ - /* determine the accumulated error to apply to this frame. Apply - * more of the error when we've been undershooting, less when - * we've been overshooting - */ - long long adjust; - int bitrate_error; - - bitrate_error = cpi->av_per_frame_bandwidth - - cpi->buffered_av_per_frame_bandwidth; - - adjust = cpi->accumulated_overshoot; - adjust *= cpi->av_per_frame_bandwidth + bitrate_error; - adjust /= cpi->oxcf.maximum_buffer_size; - if (adjust > (cpi->this_frame_target - min_frame_target)) - adjust = (cpi->this_frame_target - min_frame_target); - else if (adjust < 0) - adjust = 0; - - cpi->this_frame_target -= adjust; - cpi->accumulated_overshoot -= adjust; - } - - if(cpi->this_frame_target > max_frame_target) - cpi->this_frame_target = max_frame_target; } diff --git a/vpxenc.c b/vpxenc.c index bdecaef62..8cc66a94f 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "vpx/vpx_encoder.h" #if USE_POSIX_MMAP #include @@ -913,12 +914,16 @@ static const arg_def_t framerate = ARG_DEF(NULL, "fps", 1, "Stream frame rate (rate/scale)"); static const arg_def_t use_ivf = ARG_DEF(NULL, "ivf", 0, "Output IVF (default is WebM)"); +static const arg_def_t q_hist_n = ARG_DEF(NULL, "q-hist", 1, + "Show quantizer histogram (n-buckets)"); +static const arg_def_t rate_hist_n = ARG_DEF(NULL, "rate-hist", 1, + "Show rate histogram (n-buckets)"); static const arg_def_t *main_args[] = { &debugmode, &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl, - &verbosearg, &psnrarg, &use_ivf, + &verbosearg, &psnrarg, &use_ivf, &q_hist_n, &rate_hist_n, NULL }; @@ -1111,6 +1116,281 @@ static void usage_exit() exit(EXIT_FAILURE); } + +#define HIST_BAR_MAX 40 +struct hist_bucket +{ + int low, high, count; +}; + + +static int merge_hist_buckets(struct hist_bucket *bucket, + int *buckets_, + int max_buckets) +{ + int small_bucket = 0, merge_bucket = INT_MAX, big_bucket=0; + int buckets = *buckets_; + int i; + + /* Find the extrema for this list of buckets */ + big_bucket = small_bucket = 0; + for(i=0; i < buckets; i++) + { + if(bucket[i].count < bucket[small_bucket].count) + small_bucket = i; + if(bucket[i].count > bucket[big_bucket].count) + big_bucket = i; + } + + /* If we have too many buckets, merge the smallest with an ajacent + * bucket. + */ + while(buckets > max_buckets) + { + int last_bucket = buckets - 1; + + // merge the small bucket with an adjacent one. + if(small_bucket == 0) + merge_bucket = 1; + else if(small_bucket == last_bucket) + merge_bucket = last_bucket - 1; + else if(bucket[small_bucket - 1].count < bucket[small_bucket + 1].count) + merge_bucket = small_bucket - 1; + else + merge_bucket = small_bucket + 1; + + assert(abs(merge_bucket - small_bucket) <= 1); + assert(small_bucket < buckets); + assert(big_bucket < buckets); + assert(merge_bucket < buckets); + + if(merge_bucket < small_bucket) + { + bucket[merge_bucket].high = bucket[small_bucket].high; + bucket[merge_bucket].count += bucket[small_bucket].count; + } + else + { + bucket[small_bucket].high = bucket[merge_bucket].high; + bucket[small_bucket].count += bucket[merge_bucket].count; + merge_bucket = small_bucket; + } + + assert(bucket[merge_bucket].low != bucket[merge_bucket].high); + + buckets--; + + /* Remove the merge_bucket from the list, and find the new small + * and big buckets while we're at it + */ + big_bucket = small_bucket = 0; + for(i=0; i < buckets; i++) + { + if(i > merge_bucket) + bucket[i] = bucket[i+1]; + + if(bucket[i].count < bucket[small_bucket].count) + small_bucket = i; + if(bucket[i].count > bucket[big_bucket].count) + big_bucket = i; + } + + } + + *buckets_ = buckets; + return bucket[big_bucket].count; +} + + +static void show_histogram(const struct hist_bucket *bucket, + int buckets, + int total, + int scale) +{ + const char *pat1, *pat2; + int i; + + switch((int)(log(bucket[buckets-1].high)/log(10))+1) + { + case 1: + case 2: + pat1 = "%4d %2s: "; + pat2 = "%4d-%2d: "; + break; + case 3: + pat1 = "%5d %3s: "; + pat2 = "%5d-%3d: "; + break; + case 4: + pat1 = "%6d %4s: "; + pat2 = "%6d-%4d: "; + break; + case 5: + pat1 = "%7d %5s: "; + pat2 = "%7d-%5d: "; + break; + case 6: + pat1 = "%8d %6s: "; + pat2 = "%8d-%6d: "; + break; + case 7: + pat1 = "%9d %7s: "; + pat2 = "%9d-%7d: "; + break; + default: + pat1 = "%12d %10s: "; + pat2 = "%12d-%10d: "; + break; + } + + for(i=0; isamples = cfg->rc_buf_sz * 60 / 1000; // max 60 fps + hist->pts = calloc(hist->samples, sizeof(*hist->pts)); + hist->sz = calloc(hist->samples, sizeof(*hist->sz)); + for(i=0; ibucket[i].low = INT_MAX; + hist->bucket[i].high = 0; + hist->bucket[i].count = 0; + } +} + + +static void destroy_rate_histogram(struct rate_hist *hist) +{ + free(hist->pts); + free(hist->sz); +} + + +static void update_rate_histogram(struct rate_hist *hist, + const vpx_codec_enc_cfg_t *cfg, + const vpx_codec_cx_pkt_t *pkt) +{ + int i, idx; + int64_t now, then, sum_sz = 0, avg_bitrate; + + now = pkt->data.frame.pts * 1000 + * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den; + + idx = hist->frames++ % hist->samples; + hist->pts[idx] = now; + hist->sz[idx] = pkt->data.frame.sz; + + if(now < cfg->rc_buf_initial_sz) + return; + + /* Sum the size over the past rc_buf_sz ms */ + for(i = hist->frames; i > 0; i--) + { + int i_idx = (i-1) % hist->samples; + + then = hist->pts[i_idx]; + if(now - then > cfg->rc_buf_sz) + break; + sum_sz += hist->sz[i_idx]; + } + + avg_bitrate = sum_sz * 8 * 1000 / (now - then); + idx = avg_bitrate * (RATE_BINS/2) / (cfg->rc_target_bitrate * 1000); + if(idx < 0) + idx = 0; + if(idx > RATE_BINS-1) + idx = RATE_BINS-1; + if(hist->bucket[idx].low > avg_bitrate) + hist->bucket[idx].low = avg_bitrate; + if(hist->bucket[idx].high < avg_bitrate) + hist->bucket[idx].high = avg_bitrate; + hist->bucket[idx].count++; + hist->total++; +} + + +static void show_rate_histogram(struct rate_hist *hist, + const vpx_codec_enc_cfg_t *cfg, + int max_buckets) +{ + int i, scale; + int buckets = 0; + + for(i = 0; i < RATE_BINS; i++) + { + if(hist->bucket[i].low == INT_MAX) + continue; + hist->bucket[buckets++] = hist->bucket[i]; + } + + fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz); + scale = merge_hist_buckets(hist->bucket, &buckets, max_buckets); + show_histogram(hist->bucket, buckets, hist->total, scale); +} + #define ARG_CTRL_CNT_MAX 10 int main(int argc, const char **argv_) @@ -1149,6 +1429,10 @@ int main(int argc, const char **argv_) double psnr_totals[4] = {0, 0, 0, 0}; int psnr_count = 0; stereo_format_t stereo_fmt = STEREO_FORMAT_MONO; + int counts[64]={0}; + int show_q_hist_buckets=0; + int show_rate_hist_buckets=0; + struct rate_hist rate_hist={0}; exec_name = argv_[0]; ebml.last_pts_ms = -1; @@ -1232,6 +1516,10 @@ int main(int argc, const char **argv_) out_fn = arg.val; else if (arg_match(&arg, &debugmode, argi)) ebml.debug = 1; + else if (arg_match(&arg, &q_hist_n, argi)) + show_q_hist_buckets = arg_parse_uint(&arg); + else if (arg_match(&arg, &rate_hist_n, argi)) + show_rate_hist_buckets = arg_parse_uint(&arg); else argj++; } @@ -1419,6 +1707,8 @@ int main(int argc, const char **argv_) memset(&stats, 0, sizeof(stats)); + init_rate_histogram(&rate_hist, &cfg); + for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++) { int frames_in = 0, frames_out = 0; @@ -1656,6 +1946,16 @@ int main(int argc, const char **argv_) vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); ctx_exit_on_error(&encoder, "Failed to encode frame"); + + if(cfg.g_pass != VPX_RC_FIRST_PASS) + { + int q; + + vpx_codec_control(&encoder, VP8E_GET_LAST_QUANTIZER_64, &q); + ctx_exit_on_error(&encoder, "Failed to read quantizer"); + counts[q]++; + } + got_data = 0; while ((pkt = vpx_codec_get_cx_data(&encoder, &iter))) @@ -1669,6 +1969,7 @@ int main(int argc, const char **argv_) fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz); + update_rate_histogram(&rate_hist, &cfg, pkt); if(write_webm) { /* Update the hash */ @@ -1766,6 +2067,13 @@ int main(int argc, const char **argv_) break; } + if (show_q_hist_buckets) + show_q_histogram(counts, show_q_hist_buckets); + + if (show_rate_hist_buckets) + show_rate_histogram(&rate_hist, &cfg, show_rate_hist_buckets); + destroy_rate_histogram(&rate_hist); + vpx_img_free(&raw); free(argv); return EXIT_SUCCESS;