From f4710e3b89cc76ef2bb3620e04867c97a8ec0c89 Mon Sep 17 00:00:00 2001 From: skal Date: Fri, 31 May 2013 22:38:12 +0200 Subject: [PATCH] collect macroblock reconstruction data in VP8MBData struct This is to better separate bitstream parsing from reconstruction. Change-Id: I872b58e9940c4b14f72ebee50fba545468ff754c --- src/dec/frame.c | 45 +++++++++++++++++++++++---------------------- src/dec/tree.c | 16 +++++++++------- src/dec/vp8.c | 22 ++++++++++++---------- src/dec/vp8i.h | 28 ++++++++++++++++------------ 4 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/dec/frame.c b/src/dec/frame.c index 316dde79..aecb0e2f 100644 --- a/src/dec/frame.c +++ b/src/dec/frame.c @@ -417,7 +417,7 @@ static int AllocateMemory(VP8Decoder* const dec) { mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo) : 0; const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); - const size_t coeffs_size = 384 * sizeof(*dec->coeffs_); + const size_t mb_data_size = sizeof(*dec->mb_data_); const size_t cache_height = (16 * num_caches + kFilterExtraRows[dec->filter_type_]) * 3 / 2; const size_t cache_size = top_size * cache_height; @@ -426,7 +426,7 @@ static int AllocateMemory(VP8Decoder* const dec) { (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL; const uint64_t needed = (uint64_t)intra_pred_mode_size + top_size + mb_info_size + f_info_size - + yuv_size + coeffs_size + + yuv_size + mb_data_size + cache_size + alpha_size + ALIGN_MASK; uint8_t* mem; @@ -473,8 +473,8 @@ static int AllocateMemory(VP8Decoder* const dec) { dec->yuv_b_ = (uint8_t*)mem; mem += yuv_size; - dec->coeffs_ = (int16_t*)mem; - mem += coeffs_size; + dec->mb_data_ = (VP8MBData*)mem; + mem += mb_data_size; dec->cache_y_stride_ = 16 * mb_w; dec->cache_uv_stride_ = 8 * mb_w; @@ -554,6 +554,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) { uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; uint8_t* const u_dst = dec->yuv_b_ + U_OFF; uint8_t* const v_dst = dec->yuv_b_ + V_OFF; + const VP8MBData* const block = dec->mb_data_; // Rotate in the left samples from previously decoded block. We move four // pixels at a time for alignment reason, and because of in-loop filter. @@ -583,7 +584,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) { uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16; uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8; uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8; - const int16_t* const coeffs = dec->coeffs_; + const int16_t* const coeffs = block->coeffs_; int n; if (dec->mb_y_ > 0) { @@ -599,8 +600,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) { } // predict and add residuals - - if (dec->is_i4x4_) { // 4x4 + if (block->is_i4x4_) { // 4x4 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); if (dec->mb_y_ > 0) { @@ -613,25 +613,26 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) { // replicate the top-right pixels below top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; - // predict and add residues for all 4x4 blocks in turn. + // predict and add residuals for all 4x4 blocks in turn. for (n = 0; n < 16; n++) { uint8_t* const dst = y_dst + kScan[n]; - VP8PredLuma4[dec->imodes_[n]](dst); - if (dec->non_zero_ac_ & (1 << n)) { + VP8PredLuma4[block->imodes_[n]](dst); + if (block->non_zero_ac_ & (1 << n)) { VP8Transform(coeffs + n * 16, dst, 0); - } else if (dec->non_zero_ & (1 << n)) { // only DC is present + } else if (block->non_zero_ & (1 << n)) { // only DC is present VP8TransformDC(coeffs + n * 16, dst); } } } else { // 16x16 - const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->imodes_[0]); + const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, + block->imodes_[0]); VP8PredLuma16[pred_func](y_dst); - if (dec->non_zero_ & 0xffff) { + if (block->non_zero_ & 0xffff) { for (n = 0; n < 16; n++) { uint8_t* const dst = y_dst + kScan[n]; - if (dec->non_zero_ac_ & (1 << n)) { + if (block->non_zero_ac_ & (1 << n)) { VP8Transform(coeffs + n * 16, dst, 0); - } else if (dec->non_zero_ & (1 << n)) { // only DC is present + } else if (block->non_zero_ & (1 << n)) { // only DC is present VP8TransformDC(coeffs + n * 16, dst); } } @@ -639,21 +640,21 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) { } { // Chroma - const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->uvmode_); + const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, block->uvmode_); VP8PredChroma8[pred_func](u_dst); VP8PredChroma8[pred_func](v_dst); - if (dec->non_zero_ & 0x0f0000) { // chroma-U - const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16; - if (dec->non_zero_ac_ & 0x0f0000) { + if (block->non_zero_ & 0x0f0000) { // chroma-U + const int16_t* const u_coeffs = coeffs + 16 * 16; + if (block->non_zero_ac_ & 0x0f0000) { VP8TransformUV(u_coeffs, u_dst); } else { VP8TransformDCUV(u_coeffs, u_dst); } } - if (dec->non_zero_ & 0xf00000) { // chroma-V - const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16; - if (dec->non_zero_ac_ & 0xf00000) { + if (block->non_zero_ & 0xf00000) { // chroma-V + const int16_t* const v_coeffs = coeffs + 20 * 16; + if (block->non_zero_ac_ & 0xf00000) { VP8TransformUV(v_coeffs, v_dst); } else { VP8TransformDCUV(v_coeffs, v_dst); diff --git a/src/dec/tree.c b/src/dec/tree.c index 82484e4c..1fa99a4d 100644 --- a/src/dec/tree.c +++ b/src/dec/tree.c @@ -337,17 +337,19 @@ void VP8ResetProba(VP8Proba* const proba) { void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) { uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_; uint8_t* const left = dec->intra_l_; + VP8MBData* const block = dec->mb_data_; + // Hardcoded 16x16 intra-mode decision tree. - dec->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first - if (!dec->is_i4x4_) { + block->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first + if (!block->is_i4x4_) { const int ymode = VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED) : (VP8GetBit(br, 163) ? V_PRED : DC_PRED); - dec->imodes_[0] = ymode; + block->imodes_[0] = ymode; memset(top, ymode, 4 * sizeof(top[0])); memset(left, ymode, 4 * sizeof(left[0])); } else { - uint8_t* modes = dec->imodes_; + uint8_t* modes = block->imodes_; int y; for (y = 0; y < 4; ++y) { int ymode = left[y]; @@ -380,9 +382,9 @@ void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) { } } // Hardcoded UVMode decision tree - dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED - : !VP8GetBit(br, 114) ? V_PRED - : VP8GetBit(br, 183) ? TM_PRED : H_PRED; + block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED + : !VP8GetBit(br, 114) ? V_PRED + : VP8GetBit(br, 183) ? TM_PRED : H_PRED; } //------------------------------------------------------------------------------ diff --git a/src/dec/vp8.c b/src/dec/vp8.c index e8dd6977..168d6f72 100644 --- a/src/dec/vp8.c +++ b/src/dec/vp8.c @@ -530,7 +530,8 @@ static int ParseResiduals(VP8Decoder* const dec, int first; ProbaArray ac_prob; const VP8QuantMatrix* const q = &dec->dqm_[dec->segment_]; - int16_t* dst = dec->coeffs_; + VP8MBData* const block = dec->mb_data_; + int16_t* dst = block->coeffs_; VP8MB* const left_mb = dec->mb_info_ - 1; PackedNz nz_ac, nz_dc; PackedNz tnz, lnz; @@ -539,7 +540,7 @@ static int ParseResiduals(VP8Decoder* const dec, int x, y, ch; memset(dst, 0, 384 * sizeof(*dst)); - if (!dec->is_i4x4_) { // parse DC + if (!block->is_i4x4_) { // parse DC int16_t dc[16] = { 0 }; const int ctx = mb->nz_dc_ + left_mb->nz_dc_; mb->nz_dc_ = left_mb->nz_dc_ = @@ -598,9 +599,9 @@ static int ParseResiduals(VP8Decoder* const dec, mb->nz_ = out_t_nz; left_mb->nz_ = out_l_nz; - dec->non_zero_ac_ = non_zero_ac; - dec->non_zero_ = non_zero_ac | non_zero_dc; - return !dec->non_zero_; // will be used for further optimization + block->non_zero_ac_ = non_zero_ac; + block->non_zero_ = non_zero_ac | non_zero_dc; + return !block->non_zero_; // will be used for further optimization } #undef PACK @@ -611,6 +612,7 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { VP8BitReader* const br = &dec->br_; VP8MB* const left = dec->mb_info_ - 1; VP8MB* const mb = dec->mb_info_ + dec->mb_x_; + VP8MBData* const block = dec->mb_data_; int skip; // Note: we don't save segment map (yet), as we don't expect @@ -632,17 +634,17 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { skip = ParseResiduals(dec, mb, token_br); } else { left->nz_ = mb->nz_ = 0; - if (!dec->is_i4x4_) { + if (!block->is_i4x4_) { left->nz_dc_ = mb->nz_dc_ = 0; } - dec->non_zero_ = 0; - dec->non_zero_ac_ = 0; + block->non_zero_ = 0; + block->non_zero_ac_ = 0; } if (dec->filter_type_ > 0) { // store filter info VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_; - *finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_]; - finfo->f_inner_ = !skip || dec->is_i4x4_; + *finfo = dec->fstrengths_[dec->segment_][block->is_i4x4_]; + finfo->f_inner_ = !skip || block->is_i4x4_; } return !token_br->eof_; diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h index 6cb47390..56116a1f 100644 --- a/src/dec/vp8i.h +++ b/src/dec/vp8i.h @@ -168,6 +168,20 @@ typedef struct { quant_t y1_mat_, y2_mat_, uv_mat_; } VP8QuantMatrix; +// Data needed to reconstruct a macroblock +typedef struct { + int16_t coeffs_[384]; // 384 coeffs = (16+4+4) * 4*4 + uint8_t is_i4x4_; // true if intra4x4 + uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes + uint8_t uvmode_; // chroma prediction mode + // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits + // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for + // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order. + // If the bit is set, the 4x4 block contains some non-zero coefficients. + uint32_t non_zero_; + uint32_t non_zero_ac_; +} VP8MBData; + // Persistent information needed by the parallel processing typedef struct { int id_; // cache row to process (in [0..2]) @@ -238,11 +252,11 @@ struct VP8Decoder { uint8_t intra_l_[4]; // left intra modes values uint8_t* y_t_; // top luma samples: 16 * mb_w_ uint8_t* u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each + uint8_t segment_; // segment of the currently parsed block VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1) VP8FInfo* f_info_; // filter strength info uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE) - int16_t* coeffs_; // 384 coeffs = (16+8+8) * 4*4 uint8_t* cache_y_; // macroblock row for storing unfiltered samples uint8_t* cache_u_; @@ -256,17 +270,7 @@ struct VP8Decoder { // Per macroblock non-persistent infos. int mb_x_, mb_y_; // current position, in macroblock units - uint8_t is_i4x4_; // true if intra4x4 - uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes - uint8_t uvmode_; // chroma prediction mode - uint8_t segment_; // block's segment - - // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits - // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for - // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order. - // If the bit is set, the 4x4 block contains some non-zero coefficients. - uint32_t non_zero_; - uint32_t non_zero_ac_; + VP8MBData* mb_data_; // reconstruction data // Filtering side-info int filter_type_; // 0=off, 1=simple, 2=complex