diff --git a/src/enc/quant_enc.c b/src/enc/quant_enc.c index 67288a74..35bfaf21 100644 --- a/src/enc/quant_enc.c +++ b/src/enc/quant_enc.c @@ -829,11 +829,12 @@ static int ReconstructIntra4(VP8EncIterator* const it, //------------------------------------------------------------------------------ // DC-error diffusion -// Diffusion weights. We under-correct a bit (3/4th of the error is actually +// Diffusion weights. We under-correct a bit (15/16th of the error is actually // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. -#define C1 2 // fraction of error sent to the 4x4 block below -#define C2 1 // fraction of error sent to the 4x4 block on the right -#define DSHIFT 2 +#define C1 7 // fraction of error sent to the 4x4 block below +#define C2 8 // fraction of error sent to the 4x4 block on the right +#define DSHIFT 4 +#define DSCALE 1 // storage descaling, needed to make the error fit int8_t // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. @@ -845,10 +846,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0]; const int err = (V - qV); *v = sign ? -qV : qV; - return (sign ? -err : err) >> DSHIFT; + return (sign ? -err : err) >> DSCALE; } *v = 0; - return (sign ? -V : V) >> DSHIFT; + return (sign ? -V : V) >> DSCALE; } static void CorrectDCValues(const VP8EncIterator* const it, @@ -863,21 +864,24 @@ static void CorrectDCValues(const VP8EncIterator* const it, // as top[]/left[] on the next block. int ch; for (ch = 0; ch <= 1; ++ch) { - const int16_t* const top = it->top_derr_[it->x_][ch]; - const int16_t* const left = it->left_derr_[ch]; + const int8_t* const top = it->top_derr_[it->x_][ch]; + const int8_t* const left = it->left_derr_[ch]; int16_t (* const c)[16] = &tmp[ch * 4]; int err0, err1, err2, err3; - c[0][0] += C1 * top[0] + C2 * left[0]; + c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE); err0 = QuantizeSingle(&c[0][0], mtx); - c[1][0] += C1 * top[1] + C2 * err0; + c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE); err1 = QuantizeSingle(&c[1][0], mtx); - c[2][0] += C1 * err0 + C2 * left[1]; + c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE); err2 = QuantizeSingle(&c[2][0], mtx); - c[3][0] += C1 * err1 + C2 * err2; + c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE); err3 = QuantizeSingle(&c[3][0], mtx); - rd->derr[ch][0] = err1; - rd->derr[ch][1] = err2; - rd->derr[ch][2] = err3; + // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence + // err >> DSCALE will fit in an int8_t type if DSCALE>=1. + assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127); + rd->derr[ch][0] = (int8_t)err1; + rd->derr[ch][1] = (int8_t)err2; + rd->derr[ch][2] = (int8_t)err3; } } @@ -885,18 +889,19 @@ static void StoreDiffusionErrors(VP8EncIterator* const it, const VP8ModeScore* const rd) { int ch; for (ch = 0; ch <= 1; ++ch) { - int16_t* const top = it->top_derr_[it->x_][ch]; - int16_t* const left = it->left_derr_[ch]; - left[0] = rd->derr[ch][0]; // restore err1 - left[1] = rd->derr[ch][2]; // ... err3 - top[0] = rd->derr[ch][1]; // ... err2 - top[1] = rd->derr[ch][2]; // ... err3. + int8_t* const top = it->top_derr_[it->x_][ch]; + int8_t* const left = it->left_derr_[ch]; + left[0] = rd->derr[ch][0]; // restore err1 + left[1] = 3 * rd->derr[ch][2] >> 2; // ... 3/4th of err3 + top[0] = rd->derr[ch][1]; // ... err2 + top[1] = rd->derr[ch][2] - left[1]; // ... 1/4th of err3. } } #undef C1 #undef C2 #undef DSHIFT +#undef DSCALE //------------------------------------------------------------------------------ diff --git a/src/enc/vp8i_enc.h b/src/enc/vp8i_enc.h index d2fce941..11ff3f1d 100644 --- a/src/enc/vp8i_enc.h +++ b/src/enc/vp8i_enc.h @@ -121,7 +121,7 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { // #define DISABLE_TOKEN_BUFFER // quality below which error-diffusion is enabled -#define ERROR_DIFFUSION_QUALITY 30 +#define ERROR_DIFFUSION_QUALITY 98 //------------------------------------------------------------------------------ // Headers @@ -204,7 +204,7 @@ typedef struct { score_t i4_penalty_; // penalty for using Intra4 } VP8SegmentInfo; -typedef int16_t DError[2 /* u/v */][2 /* top or left */]; +typedef int8_t DError[2 /* u/v */][2 /* top or left */]; // Handy transient struct to accumulate score and info during RD-optimization // and mode evaluation. @@ -218,7 +218,7 @@ typedef struct { uint8_t modes_i4[16]; // mode numbers for intra4 predictions int mode_uv; // mode number of chroma prediction uint32_t nz; // non-zero blocks - int16_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3 + int8_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3 } VP8ModeScore; // Iterator structure to iterate through macroblocks, pointing to the