enable dc error-diffusion always

for q<=98, we always enable error diffusion. + reduce storage 2x by using int8_t + make the error diffusion more robust BUG=webp:340,308 Change-Id: I0608df839ff7b64d6843005a0f81d2577143af9e
2017-12-22 08:01:48 -08:00 · 2017-12-22 08:01:48 -08:00 · 815652de03
commit 815652de03
parent c2d04f3eb2
2 changed files with 29 additions and 24 deletions
--- a/src/enc/quant_enc.c
+++ b/src/enc/quant_enc.c
@ -829,11 +829,12 @@ static int ReconstructIntra4(VP8EncIterator* const it,
 //------------------------------------------------------------------------------
 // DC-error diffusion

-// Diffusion weights. We under-correct a bit (3/4th of the error is actually
+// Diffusion weights. We under-correct a bit (15/16th of the error is actually
 // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
-#define C1 2    // fraction of error sent to the 4x4 block below
-#define C2 1    // fraction of error sent to the 4x4 block on the right
-#define DSHIFT 2
+#define C1 7    // fraction of error sent to the 4x4 block below
+#define C2 8    // fraction of error sent to the 4x4 block on the right
+#define DSHIFT 4
+#define DSCALE 1   // storage descaling, needed to make the error fit int8_t

 // Quantize as usual, but also compute and return the quantization error.
 // Error is already divided by DSHIFT.
@ -845,10 +846,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
    const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
    const int err = (V - qV);
    *v = sign ? -qV : qV;
-    return (sign ? -err : err) >> DSHIFT;
+    return (sign ? -err : err) >> DSCALE;
  }
  *v = 0;
-  return (sign ? -V : V) >> DSHIFT;
+  return (sign ? -V : V) >> DSCALE;
 }

 static void CorrectDCValues(const VP8EncIterator* const it,
@ -863,21 +864,24 @@ static void CorrectDCValues(const VP8EncIterator* const it,
  // as top[]/left[] on the next block.
  int ch;
  for (ch = 0; ch <= 1; ++ch) {
-    const int16_t* const top = it->top_derr_[it->x_][ch];
-    const int16_t* const left = it->left_derr_[ch];
+    const int8_t* const top = it->top_derr_[it->x_][ch];
+    const int8_t* const left = it->left_derr_[ch];
    int16_t (* const c)[16] = &tmp[ch * 4];
    int err0, err1, err2, err3;
-    c[0][0] += C1 * top[0] + C2 * left[0];
+    c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
    err0 = QuantizeSingle(&c[0][0], mtx);
-    c[1][0] += C1 * top[1] + C2 * err0;
+    c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE);
    err1 = QuantizeSingle(&c[1][0], mtx);
-    c[2][0] += C1 * err0 + C2 * left[1];
+    c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE);
    err2 = QuantizeSingle(&c[2][0], mtx);
-    c[3][0] += C1 * err1 + C2 * err2;
+    c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
    err3 = QuantizeSingle(&c[3][0], mtx);
-    rd->derr[ch][0] = err1;
-    rd->derr[ch][1] = err2;
-    rd->derr[ch][2] = err3;
+    // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence
+    // err >> DSCALE will fit in an int8_t type if DSCALE>=1.
+    assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
+    rd->derr[ch][0] = (int8_t)err1;
+    rd->derr[ch][1] = (int8_t)err2;
+    rd->derr[ch][2] = (int8_t)err3;
  }
 }

@ -885,18 +889,19 @@ static void StoreDiffusionErrors(VP8EncIterator* const it,
                                 const VP8ModeScore* const rd) {
  int ch;
  for (ch = 0; ch <= 1; ++ch) {
-    int16_t* const top = it->top_derr_[it->x_][ch];
-    int16_t* const left = it->left_derr_[ch];
-    left[0] = rd->derr[ch][0];   // restore err1
-    left[1] = rd->derr[ch][2];   //     ... err3
-    top[0]  = rd->derr[ch][1];   //     ... err2
-    top[1]  = rd->derr[ch][2];   //     ... err3.
+    int8_t* const top = it->top_derr_[it->x_][ch];
+    int8_t* const left = it->left_derr_[ch];
+    left[0] = rd->derr[ch][0];            // restore err1
+    left[1] = 3 * rd->derr[ch][2] >> 2;   //     ... 3/4th of err3
+    top[0]  = rd->derr[ch][1];            //     ... err2
+    top[1]  = rd->derr[ch][2] - left[1];  //     ... 1/4th of err3.
  }
 }

 #undef C1
 #undef C2
 #undef DSHIFT
+#undef DSCALE

 //------------------------------------------------------------------------------

--- a/src/enc/vp8i_enc.h
+++ b/src/enc/vp8i_enc.h
@ -121,7 +121,7 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
 // #define DISABLE_TOKEN_BUFFER

 // quality below which error-diffusion is enabled
-#define ERROR_DIFFUSION_QUALITY 30
+#define ERROR_DIFFUSION_QUALITY 98

 //------------------------------------------------------------------------------
 // Headers
@ -204,7 +204,7 @@ typedef struct {
  score_t i4_penalty_;   // penalty for using Intra4
 } VP8SegmentInfo;

-typedef int16_t DError[2 /* u/v */][2 /* top or left */];
+typedef int8_t DError[2 /* u/v */][2 /* top or left */];

 // Handy transient struct to accumulate score and info during RD-optimization
 // and mode evaluation.
@ -218,7 +218,7 @@ typedef struct {
  uint8_t modes_i4[16];       // mode numbers for intra4 predictions
  int mode_uv;                // mode number of chroma prediction
  uint32_t nz;                // non-zero blocks
-  int16_t derr[2][3];         // DC diffusion errors for U/V for blocks #1/2/3
+  int8_t derr[2][3];          // DC diffusion errors for U/V for blocks #1/2/3
 } VP8ModeScore;

 // Iterator structure to iterate through macroblocks, pointing to the