From 093a32ffd73c1b47af0cd5d2600400b103e64229 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 10 Jun 2014 16:43:59 -0700 Subject: [PATCH] Reworks high-bit-depth profiles Splits profile 2 into Profile 2 and 3, where profile 2 ony supports 420 sampling, while profile 3 adds 422/444 and alpha. Keeps room for further expansion. Also makes some minor changes in the decoder parameters, replacing --convert-to-8bit with output-bit-depth. Change-Id: I713525880512de6c36698d212795db1543c1d0dd --- vp9/common/vp9_enums.h | 11 +++++--- vp9/decoder/vp9_decodeframe.c | 10 +++++--- vp9/decoder/vp9_decodeframe.h | 3 +++ vp9/encoder/vp9_bitstream.c | 28 +++++++++++++++++---- vp9/vp9_dx_iface.c | 13 +++++----- vpxdec.c | 47 ++++++++++++++++------------------- vpxenc.c | 19 ++++++++------ 7 files changed, 80 insertions(+), 51 deletions(-) diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index bac63615e..1c390b188 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -26,14 +26,17 @@ extern "C" { #define MI_MASK (MI_BLOCK_SIZE - 1) // Bitstream profiles indicated by 2 bits in the uncompressed header. -// 00: Profile 0. 4:2:0 only. -// 10: Profile 1. adds 4:4:4, 4:2:2, alpha. -// 01: Profile 2. Supports 10-bit and 12-bit color only. -// 11: Undefined profile. +// 00: Profile 0. 8-bit 4:2:0 only. +// 10: Profile 1. Adds 4:4:4, 4:2:2, alpha to Profile 0. +// 01: Profile 2. Supports 10-bit and 12-bit color only, with 4:2:0 sampling. +// 110: Profile 3. Supports 10-bit and 12-bit color only, with 4:2:2/4:4:4 +// sampling and alpha. +// 111: Undefined profile. typedef enum BITSTREAM_PROFILE { PROFILE_0, PROFILE_1, PROFILE_2, + PROFILE_3, MAX_PROFILES } BITSTREAM_PROFILE; diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index a3715e812..3c59d231a 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1109,9 +1109,11 @@ static void error_handler(void *data) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { +BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) { int profile = vp9_rb_read_bit(rb); profile |= vp9_rb_read_bit(rb) << 1; + if (profile > 2) + profile += vp9_rb_read_bit(rb); return (BITSTREAM_PROFILE) profile; } @@ -1127,7 +1129,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); - cm->profile = read_profile(rb); + cm->profile = vp9_read_profile(rb); if (cm->profile >= MAX_PROFILES) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Unsupported bitstream profile"); @@ -1173,7 +1175,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->profile >= PROFILE_1) { + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { cm->subsampling_x = vp9_rb_read_bit(rb); cm->subsampling_y = vp9_rb_read_bit(rb); vp9_rb_read_bit(rb); // has extra plane @@ -1181,7 +1183,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, cm->subsampling_y = cm->subsampling_x = 1; } } else { - if (cm->profile >= PROFILE_1) { + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { cm->subsampling_y = cm->subsampling_x = 0; vp9_rb_read_bit(rb); // has extra plane } else { diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h index fb15645a9..4a94f62a6 100644 --- a/vp9/decoder/vp9_decodeframe.h +++ b/vp9/decoder/vp9_decodeframe.h @@ -18,6 +18,7 @@ extern "C" { struct VP9Common; struct VP9Decoder; +struct vp9_read_bit_buffer; void vp9_init_dequantizer(struct VP9Common *cm); @@ -25,6 +26,8 @@ void vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end); +BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 476a06c96..43e235c89 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1036,8 +1036,27 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) { static void write_profile(BITSTREAM_PROFILE profile, struct vp9_write_bit_buffer *wb) { assert(profile < MAX_PROFILES); - vp9_wb_write_bit(wb, profile & 1); - vp9_wb_write_bit(wb, profile >> 1); + switch (profile) { + case PROFILE_0: + vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, 0); + break; + case PROFILE_1: + vp9_wb_write_bit(wb, 1); + vp9_wb_write_bit(wb, 0); + break; + case PROFILE_2: + vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, 1); + break; + case PROFILE_3: + vp9_wb_write_bit(wb, 1); + vp9_wb_write_bit(wb, 1); + vp9_wb_write_bit(wb, 0); + break; + default: + assert(0); + } } static void write_uncompressed_header(VP9_COMP *cpi, @@ -1063,16 +1082,15 @@ static void write_uncompressed_header(VP9_COMP *cpi, vp9_wb_write_literal(wb, cs, 3); if (cs != SRGB) { vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - if (cm->profile >= PROFILE_1) { + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { vp9_wb_write_bit(wb, cm->subsampling_x); vp9_wb_write_bit(wb, cm->subsampling_y); vp9_wb_write_bit(wb, 0); // has extra plane } } else { - assert(cm->profile == PROFILE_1); + assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3); vp9_wb_write_bit(wb, 0); // has extra plane } - write_frame_size(cm, wb); } else { if (!cm->show_frame) diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 178e30fbe..d5c308eb3 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -21,6 +21,7 @@ #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_read_bit_buffer.h" +#include "vp9/decoder/vp9_decodeframe.h" #include "vp9/vp9_iface_common.h" @@ -112,12 +113,11 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, { struct vp9_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; const int frame_marker = vp9_rb_read_literal(&rb, 2); - const int version = vp9_rb_read_bit(&rb); - (void) vp9_rb_read_bit(&rb); // unused version bit + const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); if (frame_marker != VP9_FRAME_MARKER) return VPX_CODEC_UNSUP_BITSTREAM; - if (version > 1) return VPX_CODEC_UNSUP_BITSTREAM; + if (profile >= MAX_PROFILES) return VPX_CODEC_UNSUP_BITSTREAM; if (vp9_rb_read_bit(&rb)) { // show an existing frame return VPX_CODEC_OK; @@ -136,16 +136,17 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_2) { return VPX_CODEC_UNSUP_BITSTREAM; } - + if (profile > PROFILE_1) + rb.bit_offset += 1; // Bit-depth 10 or 12 colorspace = vp9_rb_read_literal(&rb, 3); if (colorspace != sRGB) { rb.bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range - if (version == 1) { + if (profile == PROFILE_1 || profile == PROFILE_3) { rb.bit_offset += 2; // subsampling x/y rb.bit_offset += 1; // has extra plane } } else { - if (version == 1) { + if (profile == PROFILE_1 || profile == PROFILE_3) { rb.bit_offset += 1; // has extra plane } else { // RGB is only available in version 1 diff --git a/vpxdec.c b/vpxdec.c index 546f98565..ab6ede260 100644 --- a/vpxdec.c +++ b/vpxdec.c @@ -84,9 +84,9 @@ static const arg_def_t fb_arg = static const arg_def_t md5arg = ARG_DEF( NULL, "md5", 0, "Compute the MD5 sum of the decoded frame"); #if CONFIG_VP9_HIGH -static const arg_def_t convertto8bitarg = ARG_DEF( - NULL, "convert-to-8bit", 0, - "Output 8-bit frames even for high bitdepth streams"); +static const arg_def_t outbitdeptharg = ARG_DEF( + NULL, "output-bit-depth", 1, + "Output bit-depth for decoded frames"); #endif static const arg_def_t *all_args[] = { @@ -96,7 +96,7 @@ static const arg_def_t *all_args[] = { &md5arg, &error_concealment, #if CONFIG_VP9_HIGH - &convertto8bitarg, + &outbitdeptharg, #endif NULL }; @@ -579,7 +579,7 @@ int main_loop(int argc, const char **argv_) { int use_y4m = 1; vpx_codec_dec_cfg_t cfg = {0}; #if CONFIG_VP9_HIGH - int convert_to_8bit = 0; + int out_bit_depth = 0; #endif #if CONFIG_VP8_DECODER vp8_postproc_cfg_t vp8_pp_cfg = {0}; @@ -662,8 +662,8 @@ int main_loop(int argc, const char **argv_) { num_external_frame_buffers = arg_parse_uint(&arg); #if CONFIG_VP9_HIGH - else if (arg_match(&arg, &convertto8bitarg, argi)) { - convert_to_8bit = 1; + else if (arg_match(&arg, &outbitdeptharg, argi)) { + out_bit_depth = arg_parse_uint(&arg); } #endif @@ -850,7 +850,6 @@ int main_loop(int argc, const char **argv_) { } #endif - if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip); while (arg_skip) { @@ -980,23 +979,21 @@ int main_loop(int argc, const char **argv_) { } } #if CONFIG_VP9_HIGH - if (convert_to_8bit) { - // Convert to an 8bit image - if (img->fmt & VPX_IMG_FMT_HIGH) { - unsigned int bit_depth; - unsigned int shift = 0; // BITS_8 default - if (vpx_codec_control(&decoder, VP9D_GET_BIT_DEPTH, &bit_depth)) { - // Fallback to 8bit - bit_depth = VPX_BITS_8; - } - switch (bit_depth) { - case VPX_BITS_10: // BITS_10 - shift = 2; - break; - case VPX_BITS_12: // BITS_12 - shift = 4; - break; - } + // Convert to an 8bit image + if (img->fmt & VPX_IMG_FMT_HIGH) { + vpx_bit_depth_t bit_depth; + if (vpx_codec_control(&decoder, VP9D_GET_BIT_DEPTH, &bit_depth)) { + // Fallback to 8bit + bit_depth = VPX_BITS_8; + } + if (out_bit_depth != 8 && + out_bit_depth != bit_depth * 2 + 8) { + fprintf(stderr, "Does not support bit-depth conversion to: %d.\n", + out_bit_depth); + return EXIT_FAILURE; + } + if (out_bit_depth == 8) { + unsigned int shift = (int)bit_depth * 2; if (!img_8bit) { img_8bit = vpx_img_alloc(NULL, img->fmt - VPX_IMG_FMT_HIGH, img->d_w, img->d_h, 16); diff --git a/vpxenc.c b/vpxenc.c index ea0df80af..4b0043e63 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -362,6 +362,13 @@ static const int vp8_arg_ctrl_map[] = { #endif #if CONFIG_VP9_ENCODER +static const struct arg_enum_list aq_mode_enum[] = { + {"off", 0}, + {"variance", 1}, + {"complexity", 2}, + {"cyclic", 3}, + {NULL, 0} +}; static const arg_def_t tile_cols = ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2"); static const arg_def_t tile_rows = @@ -369,10 +376,8 @@ static const arg_def_t tile_rows = static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode"); static const arg_def_t frame_parallel_decoding = ARG_DEF( NULL, "frame-parallel", 1, "Enable frame parallel decodability features"); -static const arg_def_t aq_mode = ARG_DEF( - NULL, "aq-mode", 1, - "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, " - "3: cyclic refresh)"); +static const arg_def_t aq_mode = ARG_DEF_ENUM( + NULL, "aq-mode", 1, "Adaptive quantization mode", aq_mode_enum); static const arg_def_t frame_periodic_boost = ARG_DEF( NULL, "frame_boost", 1, "Enable frame periodic boost (0: off (default), 1: on)"); @@ -385,12 +390,12 @@ static const struct arg_enum_list bitdepth_enum[] = { {NULL, 0} }; -static const arg_def_t bitdeptharg = ARG_DEF_ENUM(NULL, "bit-depth", 1, +static const arg_def_t bitdeptharg = ARG_DEF_ENUM("b", "bit-depth", 1, "Bit depth for codec " "(8 for version <=1, " "10 or 12 for version 2)", bitdepth_enum); -static const arg_def_t inbitdeptharg = ARG_DEF("b", "input-bit-depth", 1, +static const arg_def_t inbitdeptharg = ARG_DEF(NULL, "input-bit-depth", 1, "Bit depth of input"); #endif @@ -1161,7 +1166,7 @@ static void validate_stream_config(const struct stream_state *stream, experimental_bitstream.long_name); } - // Check that the stream bit depth is greater than the input bit depth + // Check that the codec bit depth is greater than the input bit depth if (stream->config.cfg.g_in_bit_depth > stream->config.cfg.g_bit_depth * 2 + 8) { fatal("Stream %d: input bit depth (%d) less than stream bit depth (%d)",