vpx/vp9/encoder/vp9_encoder.h

532 lines
15 KiB
C
Raw Normal View History

2010-05-18 11:58:33 -04:00
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
2010-05-18 11:58:33 -04:00
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
2010-05-18 11:58:33 -04:00
*/
#ifndef VP9_ENCODER_VP9_ENCODER_H_
#define VP9_ENCODER_VP9_ENCODER_H_
2010-05-18 11:58:33 -04:00
#include <stdio.h>
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
#include "vp9/common/vp9_ppflags.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_variance.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
#include "vp9/encoder/vp9_denoiser.h"
#endif
2010-05-18 11:58:33 -04:00
#ifdef __cplusplus
extern "C" {
#endif
#define DEFAULT_GF_INTERVAL 10
2010-05-18 11:58:33 -04:00
typedef struct {
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
2010-05-18 11:58:33 -04:00
vp9_prob segment_pred_probs[PREDICTION_PROBS];
2010-05-18 11:58:33 -04:00
unsigned char *last_frame_seg_map_copy;
// 0 = Intra, Last, GF, ARF
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];
// 0 = ZERO_MV, MV
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
2010-05-18 11:58:33 -04:00
FRAME_CONTEXT fc;
2010-05-18 11:58:33 -04:00
} CODING_CONTEXT;
typedef enum {
// encode_breakout is disabled.
ENCODE_BREAKOUT_DISABLED = 0,
// encode_breakout is enabled.
ENCODE_BREAKOUT_ENABLED = 1,
// encode_breakout is enabled with small max_thresh limit.
ENCODE_BREAKOUT_LIMITED = 2
} ENCODE_BREAKOUT_TYPE;
typedef enum {
NORMAL = 0,
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
} VPX_SCALING;
typedef enum {
// Good Quality Fast Encoding. The encoder balances quality with the amount of
// time it takes to encode the output. Speed setting controls how fast.
GOOD,
// The encoder places priority on the quality of the output over encoding
// speed. The output is compressed at the highest possible quality. This
// option takes the longest amount of time to encode. Speed setting ignored.
BEST,
// Realtime/Live Encoding. This mode is optimized for realtime encoding (for
// example, capturing a television signal or feed from a live camera). Speed
// setting controls how fast.
REALTIME
} MODE;
typedef enum {
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_GOLDEN = 1 << 1,
FRAMEFLAGS_ALTREF = 1 << 2,
} FRAMETYPE_FLAGS;
typedef enum {
NO_AQ = 0,
VARIANCE_AQ = 1,
COMPLEXITY_AQ = 2,
CYCLIC_REFRESH_AQ = 3,
AQ_MODE_COUNT // This should always be the last member of the enum
} AQ_MODE;
typedef struct VP9EncoderConfig {
BITSTREAM_PROFILE profile;
BIT_DEPTH bit_depth;
int width; // width of data passed to the compressor
int height; // height of data passed to the compressor
double init_framerate; // set to passed in framerate
int64_t target_bandwidth; // bandwidth to be used in kilobits per second
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:
int speed;
unsigned int rc_max_intra_bitrate_pct;
MODE mode;
int pass;
// Key Framing Operations
int auto_key; // autodetect cut scenes and set the keyframes
int key_freq; // maximum distance to key frame.
int lag_in_frames; // how many frames lag before we start encoding
// ----------------------------------------------------------------
// DATARATE CONTROL OPTIONS
// vbr, cbr, constrained quality or constant quality
enum vpx_rc_mode rc_mode;
// buffer targeting aggressiveness
int under_shoot_pct;
int over_shoot_pct;
// buffering parameters
int64_t starting_buffer_level_ms;
int64_t optimal_buffer_level_ms;
int64_t maximum_buffer_size_ms;
// Frame drop threshold.
int drop_frames_water_mark;
// controlling quality
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
int cq_level;
AQ_MODE aq_mode; // Adaptive Quantization mode
// Internal frame size scaling.
int allow_spatial_resampling;
int scaled_frame_width;
int scaled_frame_height;
// Enable feature to reduce the frame quantization every x frames.
int frame_periodic_boost;
// two pass datarate control
int two_pass_vbrbias; // two pass datarate control tweaks
int two_pass_vbrmin_section;
int two_pass_vbrmax_section;
// END DATARATE CONTROL OPTIONS
// ----------------------------------------------------------------
// Spatial and temporal scalability.
int ss_number_layers; // Number of spatial layers.
int ts_number_layers; // Number of temporal layers.
// Bitrate allocation for spatial layers.
int ss_target_bitrate[VPX_SS_MAX_LAYERS];
int ss_play_alternate[VPX_SS_MAX_LAYERS];
// Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
int ts_target_bitrate[VPX_TS_MAX_LAYERS];
int ts_rate_decimator[VPX_TS_MAX_LAYERS];
// these parameters aren't to be used in final build don't use!!!
int play_alternate;
int encode_breakout; // early breakout : for video conf recommend 800
/* Bitfield defining the error resiliency features to enable.
* Can provide decodable frames after losses in previous
* frames and decodable partitions after losses in the same frame.
*/
unsigned int error_resilient_mode;
/* Bitfield defining the parallel decoding mode where the
* decoding in successive frames may be conducted in parallel
* just by decoding the frame headers.
*/
unsigned int frame_parallel_decoding_mode;
int arnr_max_frames;
int arnr_strength;
int arnr_type;
int tile_columns;
int tile_rows;
vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
#if CONFIG_FP_MB_STATS
vpx_fixed_buf_t firstpass_mb_stats_in;
#endif
vp8e_tuning tuning;
vp9e_tune_content content;
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
}
typedef struct VP9_COMP {
QUANTS quants;
MACROBLOCK mb;
VP9_COMMON common;
VP9EncoderConfig oxcf;
struct lookahead_ctx *lookahead;
struct lookahead_entry *alt_ref_source;
YV12_BUFFER_CONFIG *Source;
YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames
YV12_BUFFER_CONFIG *un_scaled_source;
YV12_BUFFER_CONFIG scaled_source;
YV12_BUFFER_CONFIG *unscaled_last_source;
YV12_BUFFER_CONFIG scaled_last_source;
2010-05-18 11:58:33 -04:00
int skippable_frame;
int scaled_ref_idx[3];
int lst_fb_idx;
int gld_fb_idx;
int alt_fb_idx;
int refresh_last_frame;
int refresh_golden_frame;
int refresh_alt_ref_frame;
int ext_refresh_frame_flags_pending;
int ext_refresh_last_frame;
int ext_refresh_golden_frame;
int ext_refresh_alt_ref_frame;
int ext_refresh_frame_context_pending;
int ext_refresh_frame_context;
YV12_BUFFER_CONFIG last_frame_uf;
2010-05-18 11:58:33 -04:00
TOKENEXTRA *tok;
unsigned int tok_count[4][1 << 6];
2010-05-18 11:58:33 -04:00
// Ambient reconstruction err target for force key frames
int ambient_err;
RD_OPT rd;
2010-05-18 11:58:33 -04:00
CODING_CONTEXT coding_context;
2010-05-18 11:58:33 -04:00
int *nmvcosts[2];
int *nmvcosts_hp[2];
int *nmvsadcosts[2];
int *nmvsadcosts_hp[2];
int zbin_mode_boost;
int zbin_mode_boost_enabled;
2010-05-18 11:58:33 -04:00
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
int64_t first_time_stamp_ever;
2010-05-18 11:58:33 -04:00
RATE_CONTROL rc;
double framerate;
2010-05-18 11:58:33 -04:00
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
32x32 transform for superblocks. This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds code all over the place to wrap that in the bitstream/encoder/decoder/RD. Some implementation notes (these probably need careful review): - token range is extended by 1 bit, since the value range out of this transform is [-16384,16383]. - the coefficients coming out of the FDCT are manually scaled back by 1 bit, or else they won't fit in int16_t (they are 17 bits). Because of this, the RD error scoring does not right-shift the MSE score by two (unlike for 4x4/8x8/16x16). - to compensate for this loss in precision, the quantizer is halved also. This is currently a little hacky. - FDCT and IDCT is double-only right now. Needs a fixed-point impl. - There are no default probabilities for the 32x32 transform yet; I'm simply using the 16x16 luma ones. A future commit will add newly generated probabilities for all transforms. - No ADST version. I don't think we'll add one for this level; if an ADST is desired, transform-size selection can scale back to 16x16 or lower, and use an ADST at that level. Additional notes specific to Debargha's DWT/DCT hybrid: - coefficient scale is different for the top/left 16x16 (DCT-over-DWT) block than for the rest (DWT pixel differences) of the block. Therefore, RD error scoring isn't easily scalable between coefficient and pixel domain. Thus, unfortunately, we need to compute the RD distortion in the pixel domain until we figure out how to scale these appropriately. Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 14:45:05 -08:00
struct vpx_codec_pkt_list *output_pkt_list;
MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
int mbgraph_n_frames; // number of frames filled in the above
int static_mb_pct; // % forced skip mbs by segmentation
int ref_frame_flags;
2010-05-18 11:58:33 -04:00
SPEED_FEATURES sf;
2010-05-18 11:58:33 -04:00
unsigned int max_mv_magnitude;
int mv_step_param;
// Default value is 1. From first pass stats, encode_breakout may be disabled.
ENCODE_BREAKOUT_TYPE allow_encode_breakout;
// Get threshold from external input. A suggested threshold is 800 for HD
// clips, and 300 for < HD clips.
int encode_breakout;
unsigned char *segmentation_map;
// segment threashold for encode breakout
int segment_encode_breakout[MAX_SEGMENTS];
2010-05-18 11:58:33 -04:00
unsigned char *complexity_map;
CYCLIC_REFRESH *cyclic_refresh;
fractional_mv_step_fp *find_fractional_mv_step;
vp9_full_search_fn_t full_search_sad;
vp9_refining_search_fn_t refining_search_sad;
vp9_diamond_search_fn_t diamond_search_sad;
vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
uint64_t time_receive_data;
uint64_t time_compress_data;
uint64_t time_pick_lpf;
uint64_t time_encode_sb_row;
2010-05-18 11:58:33 -04:00
#if CONFIG_FP_MB_STATS
int use_fp_mb_stats;
#endif
TWO_PASS twopass;
2010-05-18 11:58:33 -04:00
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
2010-05-18 11:58:33 -04:00
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
int count;
double total_y;
double total_u;
double total_v;
double total;
uint64_t total_sq_error;
uint64_t total_samples;
double totalp_y;
double totalp_u;
double totalp_v;
double totalp;
uint64_t totalp_sq_error;
uint64_t totalp_samples;
int bytes;
double summed_quality;
double summed_weights;
double summedp_quality;
double summedp_weights;
unsigned int tot_recode_hits;
double total_ssimg_y;
double total_ssimg_u;
double total_ssimg_v;
double total_ssimg_all;
int b_calculate_ssimg;
2010-05-18 11:58:33 -04:00
#endif
int b_calculate_psnr;
int droppable;
int dummy_packing; /* flag to indicate if packing is dummy */
unsigned int tx_stepdown_count[TX_SIZES];
Tx size selection enhancements (1) Refines the modeling function and uses that to add some speed features. Specifically, intead of using a flag use_largest_txfm as a speed feature, an enum tx_size_search_method is used, of which two of the types are USE_FULL_RD and USE_LARGESTALL. Two other new types are added: USE_LARGESTINTRA (use largest only for intra) USE_LARGESTINTRA_MODELINTER (use largest for intra, and model for inter) (2) Another change is that the framework for deciding transform type is simplified to use a heuristic count based method rather than an rd based method using txfm_cache. In practice the new method is found to work just as well - with derf only -0.01 down. The new method is more compatible with the new framework where certain rd costs are based on full rd and certain others are based on modeled rd or are not computed. In this patch the existing rd based method is still kept for use in the USE_FULL_RD mode. In the other modes, the count based method is used. However the recommendation is to remove it eventually since the benefit is limited, and will remove a lot of complications in the code (3) Finally a bug is fixed with the existing use_largest_txfm speed feature that causes mismatches when the lossless mode and 4x4 WH transform is forced. Results on derf: USE_FULL_RD: +0.03% (due to change in the tables), 0% encode time reduction USE_LARGESTINTRA: -0.21%, 15% encode time reduction (this one is a pretty good compromise) USE_LARGESTINTRA_MODELINTER: -0.98%, 22% encode time reduction (currently the benefit of modeling is limited for txfm size selection, but keeping this enum as a placeholder) . USE_LARGESTALL: -1.05%, 27% encode-time reduction (same as existing use_largest_txfm speed feature). Change-Id: I4d60a5f9ce78fbc90cddf2f97ed91d8bc0d4f936
2013-06-21 16:31:12 -07:00
int initial_width;
int initial_height;
int use_svc;
SVC svc;
// Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
diff *source_diff_var;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh;
int frames_till_next_var_check;
int frame_flags;
search_site_config ss_cfg;
int mbmode_cost[INTRA_MODES];
unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
PICK_MODE_CONTEXT *leaf_tree;
PC_TREE *pc_tree;
PC_TREE *pc_root;
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
int multi_arf_allowed;
int multi_arf_enabled;
int multi_arf_last_grp_enabled;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser;
#endif
} VP9_COMP;
2010-05-18 11:58:33 -04:00
void vp9_initialize_enc();
struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf);
void vp9_remove_compressor(VP9_COMP *cpi);
void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf);
// receive a frames worth of data. caller can assume that a copy of this
// frame is made and not just a copy of the pointer..
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time_stamp);
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush);
int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags);
int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags);
void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
int vp9_update_entropy(VP9_COMP *cpi, int update);
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
int vp9_set_internal_size(VP9_COMP *cpi,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
unsigned int height);
void vp9_set_svc(VP9_COMP *cpi, int use_svc);
int vp9_get_quantizer(struct VP9_COMP *cpi);
static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
MV_REFERENCE_FRAME ref_frame) {
if (ref_frame == LAST_FRAME) {
return cpi->lst_fb_idx;
} else if (ref_frame == GOLDEN_FRAME) {
return cpi->gld_fb_idx;
} else {
return cpi->alt_fb_idx;
}
}
static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
VP9_COMMON * const cm = &cpi->common;
return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
.buf;
}
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
// TODO(JBB): double check we can't exceed this token count if we have a
// 32x32 transform crossing a boundary at a multiple of 16.
// mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
// resolution. We assume up to 1 token per pixel, and then allow
// a head room of 4.
return mb_rows * mb_cols * (16 * 16 * 3 + 4);
}
int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
void vp9_alloc_compressor_data(VP9_COMP *cpi);
void vp9_scale_references(VP9_COMP *cpi);
void vp9_update_reference_frames(VP9_COMP *cpi);
void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) {
return cpi->use_svc &&
cpi->svc.number_temporal_layers == 1 &&
cpi->svc.number_spatial_layers > 1;
}
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
(cpi->oxcf.play_alternate &&
(!is_spatial_svc(cpi) ||
cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));
}
static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0,
MV_REFERENCE_FRAME ref1) {
xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
: 0];
xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
: 0];
}
static INLINE int get_chessboard_index(const int frame_index) {
return frame_index & 0x1;
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_ENCODER_VP9_ENCODER_H_