diff --git a/examples/dwebp.c b/examples/dwebp.c index eb40b747..36a95b6f 100644 --- a/examples/dwebp.c +++ b/examples/dwebp.c @@ -555,6 +555,8 @@ static void Help(void) { " -version .... print version number and exit.\n" " -nofancy ..... don't use the fancy YUV420 upscaler.\n" " -nofilter .... disable in-loop filtering.\n" + " -nodither .... disable dithering.\n" + " -dither .. dithering strength (in 0..100)\n" " -mt .......... use multi-threading\n" " -crop ... crop output with the given rectangle\n" " -scale .......... scale the output (*after* any cropping)\n" @@ -625,6 +627,10 @@ int main(int argc, const char *argv[]) { format = YUV; } else if (!strcmp(argv[c], "-mt")) { config.options.use_threads = 1; + } else if (!strcmp(argv[c], "-nodither")) { + config.options.dithering_strength = 0; + } else if (!strcmp(argv[c], "-dither") && c < argc - 1) { + config.options.dithering_strength = strtol(argv[++c], NULL, 0); } else if (!strcmp(argv[c], "-crop") && c < argc - 4) { config.options.use_cropping = 1; config.options.crop_left = strtol(argv[++c], NULL, 0); @@ -719,7 +725,7 @@ int main(int argc, const char *argv[]) { if (!incremental) { status = WebPDecode(data, data_size, &config); } else { - WebPIDecoder* const idec = WebPINewDecoder(output_buffer); + WebPIDecoder* const idec = WebPIDecode(data, data_size, &config); if (idec == NULL) { fprintf(stderr, "Failed during WebPINewDecoder().\n"); status = VP8_STATUS_OUT_OF_MEMORY; diff --git a/examples/vwebp.c b/examples/vwebp.c index b0f0771d..da08135e 100644 --- a/examples/vwebp.c +++ b/examples/vwebp.c @@ -376,6 +376,7 @@ static void Help(void) { " -noicc ....... don't use the icc profile if present.\n" " -nofancy ..... don't use the fancy YUV420 upscaler.\n" " -nofilter .... disable in-loop filtering.\n" + " -dither dithering strength (0..100). Default=50.\n" " -mt .......... use multi-threading.\n" " -info ........ print info.\n" " -h ....... this help message.\n" @@ -397,6 +398,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Library version mismatch!\n"); return -1; } + config->options.dithering_strength = 50; kParams.use_color_profile = 1; for (c = 1; c < argc; ++c) { @@ -409,6 +411,8 @@ int main(int argc, char *argv[]) { config->options.no_fancy_upsampling = 1; } else if (!strcmp(argv[c], "-nofilter")) { config->options.bypass_filtering = 1; + } else if (!strcmp(argv[c], "-dither") && c + 1 < argc) { + config->options.dithering_strength = strtol(argv[++c], NULL, 0); } else if (!strcmp(argv[c], "-info")) { kParams.print_info = 1; } else if (!strcmp(argv[c], "-version")) { diff --git a/man/dwebp.1 b/man/dwebp.1 index 4426aedc..9a616763 100644 --- a/man/dwebp.1 +++ b/man/dwebp.1 @@ -1,5 +1,5 @@ .\" Hey, EMACS: -*- nroff -*- -.TH DWEBP 1 "May 10, 2013" +.TH DWEBP 1 "November 26, 2013" .SH NAME dwebp \- decompress a WebP file to an image file .SH SYNOPSIS @@ -55,7 +55,15 @@ edges (especially the red ones), but should be faster. .B \-nofilter Don't use the in-loop filtering process even if it is required by the bitstream. This may produce visible blocks on the non-compliant output, -but will make the decoding faster. +but it will make the decoding faster. +.TP +.B \-dither " strength +Specify a dithering \fBstrength\fP between 0 and 100. Dithering is a +post-processing effect applied to chroma components in lossy compression. +It helps by smoothing gradients and avoiding banding artifacts. +.TP +.B \-nodither +Disable all dithering (default). .TP .B \-mt Use multi-threading for decoding, if possible. diff --git a/src/dec/frame.c b/src/dec/frame.c index 88eb3c87..30248049 100644 --- a/src/dec/frame.c +++ b/src/dec/frame.c @@ -148,6 +148,82 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) { } } +//------------------------------------------------------------------------------ +// Dithering + +#define DITHER_AMP_TAB_SIZE 12 +static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = { + // roughly, it's dqm->uv_mat_[1] + 8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1 +}; + +void VP8InitDithering(const WebPDecoderOptions* const options, + VP8Decoder* const dec) { + assert(dec != NULL); + if (options != NULL) { + const int d = options->dithering_strength; + const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1; + const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100); + if (f > 0) { + int s; + int all_amp = 0; + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + VP8QuantMatrix* const dqm = &dec->dqm_[s]; + if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) { + // TODO(skal): should we specially dither more for uv_quant_ < 0? + const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; + dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; + } + all_amp |= dqm->dither_; + } + if (all_amp != 0) { + VP8InitRandom(&dec->dithering_rg_, 1.0f); + dec->dither_ = 1; + } + } + } +} + +// minimal amp that will provide a non-zero dithering effect +#define MIN_DITHER_AMP 4 +#define DITHER_DESCALE 4 +#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) +#define DITHER_AMP_BITS 8 +#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) + +static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) { + int i, j; + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) { + // TODO: could be made faster with SSE2 + const int bits = + VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER; + // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100 + const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE; + const int v = (int)dst[i] + delta; + dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v; + } + dst += bps; + } +} + +static void DitherRow(VP8Decoder* const dec) { + int mb_x; + assert(dec->dither_); + for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { + const VP8ThreadContext* const ctx = &dec->thread_ctx_; + const VP8MBData* const data = ctx->mb_data_ + mb_x; + const int cache_id = ctx->id_; + const int uv_bps = dec->cache_uv_stride_; + if (data->dither_ >= MIN_DITHER_AMP) { + uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8; + uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8; + Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_); + Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_); + } + } +} + //------------------------------------------------------------------------------ // This function is called after a row of macroblocks is finished decoding. // It also takes into account the following restrictions: @@ -186,6 +262,10 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { FilterRow(dec); } + if (dec->dither_) { + DitherRow(dec); + } + if (io->put != NULL) { int y_start = MACROBLOCK_VPOS(mb_y); int y_end = MACROBLOCK_VPOS(mb_y + 1); diff --git a/src/dec/idec.c b/src/dec/idec.c index 78562e3e..a9b8acc8 100644 --- a/src/dec/idec.c +++ b/src/dec/idec.c @@ -423,6 +423,7 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) { // This change must be done before calling VP8InitFrame() dec->mt_method_ = VP8GetThreadMethod(params->options, NULL, io->width, io->height); + VP8InitDithering(params->options, dec); if (!CopyParts0Data(idec)) { return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY); } diff --git a/src/dec/quant.c b/src/dec/quant.c index a4cc693d..fea6c530 100644 --- a/src/dec/quant.c +++ b/src/dec/quant.c @@ -104,6 +104,8 @@ void VP8ParseQuant(VP8Decoder* const dec) { m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)]; m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)]; + + m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation } } } diff --git a/src/dec/vp8.c b/src/dec/vp8.c index 3c17ae51..d2f4cfb4 100644 --- a/src/dec/vp8.c +++ b/src/dec/vp8.c @@ -561,6 +561,12 @@ static int ParseResiduals(VP8Decoder* const dec, block->non_zero_y_ = non_zero_y; block->non_zero_uv_ = non_zero_uv; + + // We look at the mode-code of each block and check if some blocks have less + // than three non-zero coeffs (code < 2). This is to avoid dithering flat and + // empty blocks. + block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_; + return !(non_zero_y | non_zero_uv); // will be used for further optimization } diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h index abd765c9..bb438626 100644 --- a/src/dec/vp8i.h +++ b/src/dec/vp8i.h @@ -17,6 +17,7 @@ #include // for memcpy() #include "./vp8li.h" #include "../utils/bit_reader.h" +#include "../utils/random.h" #include "../utils/thread.h" #include "../dsp/dsp.h" @@ -173,6 +174,9 @@ typedef struct { // Top/Left Contexts used for syntax-parsing typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower). typedef struct { quant_t y1_mat_, y2_mat_, uv_mat_; + + int uv_quant_; // U/V quantizer value + int dither_; // dithering amplitude (0 = off, max=255) } VP8QuantMatrix; // Data needed to reconstruct a macroblock @@ -190,6 +194,7 @@ typedef struct { // This allows to call specialized transform functions. uint32_t non_zero_y_; uint32_t non_zero_uv_; + uint8_t dither_; // local dithering strength (deduced from non_zero_*) } VP8MBData; // Persistent information needed by the parallel processing @@ -244,6 +249,10 @@ struct VP8Decoder { // per-partition boolean decoders. VP8BitReader parts_[MAX_NUM_PARTITIONS]; + // Dithering strength, deduced from decoding options + int dither_; // whether to use dithering or not + VP8Random dithering_rg_; // random generator for dithering + // dequantization (one set of DC/AC dequant factor per segment) VP8QuantMatrix dqm_[NUM_MB_SEGMENTS]; @@ -324,7 +333,10 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); int VP8GetThreadMethod(const WebPDecoderOptions* const options, const WebPHeaderStructure* const headers, int width, int height); -// Process the last decoded row (filtering + output) +// Initialize dithering post-process if needed. +void VP8InitDithering(const WebPDecoderOptions* const options, + VP8Decoder* const dec); +// Process the last decoded row (filtering + output). int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); // To be called at the start of a new scanline, to initialize predictors. void VP8InitScanline(VP8Decoder* const dec); diff --git a/src/dec/webp.c b/src/dec/webp.c index 21d16070..08f8bb77 100644 --- a/src/dec/webp.c +++ b/src/dec/webp.c @@ -474,6 +474,7 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, // This change must be done before calling VP8Decode() dec->mt_method_ = VP8GetThreadMethod(params->options, &headers, io.width, io.height); + VP8InitDithering(params->options, dec); if (!VP8Decode(dec, &io)) { status = dec->status_; } diff --git a/src/utils/random.h b/src/utils/random.h index 9a755eb9..32632d3d 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -34,8 +34,10 @@ typedef struct { void VP8InitRandom(VP8Random* const rg, float dithering); // Returns a centered pseudo-random number with 'num_bits' amplitude. -// (uses D.Knuth's Difference-based random generator) -static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { +// (uses D.Knuth's Difference-based random generator). +// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision. +static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits, + int amp) { int diff; assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31); diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_]; @@ -43,12 +45,16 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { rg->tab_[rg->index1_] = diff; if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0; if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0; - diff = (diff << 1) >> (32 - num_bits); // sign-extend, 0-center - diff = (diff * rg->amp_) >> VP8_RANDOM_DITHER_FIX; // restrict range - diff += 1 << (num_bits - 1); // shift back to 0.5-center + diff = (diff << 1) >> (32 - num_bits); // sign-extend, 0-center + diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range + diff += 1 << (num_bits - 1); // shift back to 0.5-center return diff; } +static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { + return VP8RandomBits2(rg, num_bits, rg->amp_); +} + #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" #endif diff --git a/src/webp/decode.h b/src/webp/decode.h index 404dae18..ad5125ac 100644 --- a/src/webp/decode.h +++ b/src/webp/decode.h @@ -20,7 +20,7 @@ extern "C" { #endif -#define WEBP_DECODER_ABI_VERSION 0x0202 // MAJOR(8b) + MINOR(8b) +#define WEBP_DECODER_ABI_VERSION 0x0203 // MAJOR(8b) + MINOR(8b) // Note: forward declaring enumerations is not allowed in (strict) C and C++, // the types are left here for reference. @@ -441,11 +441,12 @@ struct WebPDecoderOptions { int use_scaling; // if true, scaling is applied _afterward_ int scaled_width, scaled_height; // final resolution int use_threads; // if true, use multi-threaded decoding + int dithering_strength; // dithering strength (0=Off, 100=full) // Unused for now: int force_rotation; // forced rotation (to be applied _last_) int no_enhancement; // if true, discard enhancement layer - uint32_t pad[6]; // padding for later use + uint32_t pad[5]; // padding for later use }; // Main object storing the configuration for advanced decoding.