From f4710e3b89cc76ef2bb3620e04867c97a8ec0c89 Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Fri, 31 May 2013 22:38:12 +0200
Subject: [PATCH] collect macroblock reconstruction data in VP8MBData struct

This is to better separate bitstream parsing from reconstruction.

Change-Id: I872b58e9940c4b14f72ebee50fba545468ff754c
---
 src/dec/frame.c | 45 +++++++++++++++++++++++----------------------
 src/dec/tree.c  | 16 +++++++++-------
 src/dec/vp8.c   | 22 ++++++++++++----------
 src/dec/vp8i.h  | 28 ++++++++++++++++------------
 4 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/src/dec/frame.c b/src/dec/frame.c
index 316dde79..aecb0e2f 100644
--- a/src/dec/frame.c
+++ b/src/dec/frame.c
@@ -417,7 +417,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
           mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
         : 0;
   const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
-  const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
+  const size_t mb_data_size = sizeof(*dec->mb_data_);
   const size_t cache_height = (16 * num_caches
                             + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
   const size_t cache_size = top_size * cache_height;
@@ -426,7 +426,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
       (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
   const uint64_t needed = (uint64_t)intra_pred_mode_size
                         + top_size + mb_info_size + f_info_size
-                        + yuv_size + coeffs_size
+                        + yuv_size + mb_data_size
                         + cache_size + alpha_size + ALIGN_MASK;
   uint8_t* mem;
 
@@ -473,8 +473,8 @@ static int AllocateMemory(VP8Decoder* const dec) {
   dec->yuv_b_ = (uint8_t*)mem;
   mem += yuv_size;
 
-  dec->coeffs_ = (int16_t*)mem;
-  mem += coeffs_size;
+  dec->mb_data_ = (VP8MBData*)mem;
+  mem += mb_data_size;
 
   dec->cache_y_stride_ = 16 * mb_w;
   dec->cache_uv_stride_ = 8 * mb_w;
@@ -554,6 +554,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
   uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
   uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
   uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+  const VP8MBData* const block = dec->mb_data_;
 
   // Rotate in the left samples from previously decoded block. We move four
   // pixels at a time for alignment reason, and because of in-loop filter.
@@ -583,7 +584,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
     uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
     uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
     uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
-    const int16_t* const coeffs = dec->coeffs_;
+    const int16_t* const coeffs = block->coeffs_;
     int n;
 
     if (dec->mb_y_ > 0) {
@@ -599,8 +600,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
     }
 
     // predict and add residuals
-
-    if (dec->is_i4x4_) {   // 4x4
+    if (block->is_i4x4_) {   // 4x4
       uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
 
       if (dec->mb_y_ > 0) {
@@ -613,25 +613,26 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
       // replicate the top-right pixels below
       top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
 
-      // predict and add residues for all 4x4 blocks in turn.
+      // predict and add residuals for all 4x4 blocks in turn.
       for (n = 0; n < 16; n++) {
         uint8_t* const dst = y_dst + kScan[n];
-        VP8PredLuma4[dec->imodes_[n]](dst);
-        if (dec->non_zero_ac_ & (1 << n)) {
+        VP8PredLuma4[block->imodes_[n]](dst);
+        if (block->non_zero_ac_ & (1 << n)) {
           VP8Transform(coeffs + n * 16, dst, 0);
-        } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
+        } else if (block->non_zero_ & (1 << n)) {  // only DC is present
           VP8TransformDC(coeffs + n * 16, dst);
         }
       }
     } else {    // 16x16
-      const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->imodes_[0]);
+      const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_,
+                                      block->imodes_[0]);
       VP8PredLuma16[pred_func](y_dst);
-      if (dec->non_zero_ & 0xffff) {
+      if (block->non_zero_ & 0xffff) {
         for (n = 0; n < 16; n++) {
           uint8_t* const dst = y_dst + kScan[n];
-          if (dec->non_zero_ac_ & (1 << n)) {
+          if (block->non_zero_ac_ & (1 << n)) {
             VP8Transform(coeffs + n * 16, dst, 0);
-          } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
+          } else if (block->non_zero_ & (1 << n)) {  // only DC is present
             VP8TransformDC(coeffs + n * 16, dst);
           }
         }
@@ -639,21 +640,21 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
     }
     {
       // Chroma
-      const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->uvmode_);
+      const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, block->uvmode_);
       VP8PredChroma8[pred_func](u_dst);
       VP8PredChroma8[pred_func](v_dst);
 
-      if (dec->non_zero_ & 0x0f0000) {   // chroma-U
-        const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
-        if (dec->non_zero_ac_ & 0x0f0000) {
+      if (block->non_zero_ & 0x0f0000) {   // chroma-U
+        const int16_t* const u_coeffs = coeffs + 16 * 16;
+        if (block->non_zero_ac_ & 0x0f0000) {
           VP8TransformUV(u_coeffs, u_dst);
         } else {
           VP8TransformDCUV(u_coeffs, u_dst);
         }
       }
-      if (dec->non_zero_ & 0xf00000) {   // chroma-V
-        const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
-        if (dec->non_zero_ac_ & 0xf00000) {
+      if (block->non_zero_ & 0xf00000) {   // chroma-V
+        const int16_t* const v_coeffs = coeffs + 20 * 16;
+        if (block->non_zero_ac_ & 0xf00000) {
           VP8TransformUV(v_coeffs, v_dst);
         } else {
           VP8TransformDCUV(v_coeffs, v_dst);
diff --git a/src/dec/tree.c b/src/dec/tree.c
index 82484e4c..1fa99a4d 100644
--- a/src/dec/tree.c
+++ b/src/dec/tree.c
@@ -337,17 +337,19 @@ void VP8ResetProba(VP8Proba* const proba) {
 void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
   uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_;
   uint8_t* const left = dec->intra_l_;
+  VP8MBData* const block = dec->mb_data_;
+
   // Hardcoded 16x16 intra-mode decision tree.
-  dec->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
-  if (!dec->is_i4x4_) {
+  block->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
+  if (!block->is_i4x4_) {
     const int ymode =
         VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
                            : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
-    dec->imodes_[0] = ymode;
+    block->imodes_[0] = ymode;
     memset(top, ymode, 4 * sizeof(top[0]));
     memset(left, ymode, 4 * sizeof(left[0]));
   } else {
-    uint8_t* modes = dec->imodes_;
+    uint8_t* modes = block->imodes_;
     int y;
     for (y = 0; y < 4; ++y) {
       int ymode = left[y];
@@ -380,9 +382,9 @@ void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
     }
   }
   // Hardcoded UVMode decision tree
-  dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
-               : !VP8GetBit(br, 114) ? V_PRED
-               : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+  block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
+                 : !VP8GetBit(br, 114) ? V_PRED
+                 : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
 }
 
 //------------------------------------------------------------------------------
diff --git a/src/dec/vp8.c b/src/dec/vp8.c
index e8dd6977..168d6f72 100644
--- a/src/dec/vp8.c
+++ b/src/dec/vp8.c
@@ -530,7 +530,8 @@ static int ParseResiduals(VP8Decoder* const dec,
   int first;
   ProbaArray ac_prob;
   const VP8QuantMatrix* const q = &dec->dqm_[dec->segment_];
-  int16_t* dst = dec->coeffs_;
+  VP8MBData* const block = dec->mb_data_;
+  int16_t* dst = block->coeffs_;
   VP8MB* const left_mb = dec->mb_info_ - 1;
   PackedNz nz_ac, nz_dc;
   PackedNz tnz, lnz;
@@ -539,7 +540,7 @@ static int ParseResiduals(VP8Decoder* const dec,
   int x, y, ch;
 
   memset(dst, 0, 384 * sizeof(*dst));
-  if (!dec->is_i4x4_) {    // parse DC
+  if (!block->is_i4x4_) {    // parse DC
     int16_t dc[16] = { 0 };
     const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
     mb->nz_dc_ = left_mb->nz_dc_ =
@@ -598,9 +599,9 @@ static int ParseResiduals(VP8Decoder* const dec,
   mb->nz_ = out_t_nz;
   left_mb->nz_ = out_l_nz;
 
-  dec->non_zero_ac_ = non_zero_ac;
-  dec->non_zero_ = non_zero_ac | non_zero_dc;
-  return !dec->non_zero_;   // will be used for further optimization
+  block->non_zero_ac_ = non_zero_ac;
+  block->non_zero_ = non_zero_ac | non_zero_dc;
+  return !block->non_zero_;   // will be used for further optimization
 }
 #undef PACK
 
@@ -611,6 +612,7 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
   VP8BitReader* const br = &dec->br_;
   VP8MB* const left = dec->mb_info_ - 1;
   VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
+  VP8MBData* const block = dec->mb_data_;
   int skip;
 
   // Note: we don't save segment map (yet), as we don't expect
@@ -632,17 +634,17 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
     skip = ParseResiduals(dec, mb, token_br);
   } else {
     left->nz_ = mb->nz_ = 0;
-    if (!dec->is_i4x4_) {
+    if (!block->is_i4x4_) {
       left->nz_dc_ = mb->nz_dc_ = 0;
     }
-    dec->non_zero_ = 0;
-    dec->non_zero_ac_ = 0;
+    block->non_zero_ = 0;
+    block->non_zero_ac_ = 0;
   }
 
   if (dec->filter_type_ > 0) {  // store filter info
     VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
-    *finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
-    finfo->f_inner_ = !skip || dec->is_i4x4_;
+    *finfo = dec->fstrengths_[dec->segment_][block->is_i4x4_];
+    finfo->f_inner_ = !skip || block->is_i4x4_;
   }
 
   return !token_br->eof_;
diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h
index 6cb47390..56116a1f 100644
--- a/src/dec/vp8i.h
+++ b/src/dec/vp8i.h
@@ -168,6 +168,20 @@ typedef struct {
   quant_t y1_mat_, y2_mat_, uv_mat_;
 } VP8QuantMatrix;
 
+// Data needed to reconstruct a macroblock
+typedef struct {
+  int16_t coeffs_[384];   // 384 coeffs = (16+4+4) * 4*4
+  uint8_t is_i4x4_;       // true if intra4x4
+  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode_;        // chroma prediction mode
+  // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
+  // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
+  // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
+  // If the bit is set, the 4x4 block contains some non-zero coefficients.
+  uint32_t non_zero_;
+  uint32_t non_zero_ac_;
+} VP8MBData;
+
 // Persistent information needed by the parallel processing
 typedef struct {
   int id_;            // cache row to process (in [0..2])
@@ -238,11 +252,11 @@ struct VP8Decoder {
   uint8_t  intra_l_[4];  // left intra modes values
   uint8_t* y_t_;         // top luma samples: 16 * mb_w_
   uint8_t* u_t_, *v_t_;  // top u/v samples: 8 * mb_w_ each
+  uint8_t segment_;      // segment of the currently parsed block
 
   VP8MB* mb_info_;       // contextual macroblock info (mb_w_ + 1)
   VP8FInfo* f_info_;     // filter strength info
   uint8_t* yuv_b_;       // main block for Y/U/V (size = YUV_SIZE)
-  int16_t* coeffs_;      // 384 coeffs = (16+8+8) * 4*4
 
   uint8_t* cache_y_;     // macroblock row for storing unfiltered samples
   uint8_t* cache_u_;
@@ -256,17 +270,7 @@ struct VP8Decoder {
 
   // Per macroblock non-persistent infos.
   int mb_x_, mb_y_;       // current position, in macroblock units
-  uint8_t is_i4x4_;       // true if intra4x4
-  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
-  uint8_t uvmode_;        // chroma prediction mode
-  uint8_t segment_;       // block's segment
-
-  // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
-  // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
-  // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
-  // If the bit is set, the 4x4 block contains some non-zero coefficients.
-  uint32_t non_zero_;
-  uint32_t non_zero_ac_;
+  VP8MBData* mb_data_;    // reconstruction data
 
   // Filtering side-info
   int filter_type_;                          // 0=off, 1=simple, 2=complex