add a -dither dithering option to the decoder
Even at high quality setting, the U/V quantizer step is limited to 4 which can lead to banding on gradient. This option allows to selectively apply some randomness to potentially flattened-out U/V blocks and attenuate the banding. This option is off by default in 'dwebp', but set to -dither 50 by default in 'vwebp'. Note: depending on the number of blocks selectively dithered, we can have up to a 10% slow-down in decoding speed it seems. Change-Id: Icc2446007f33ddacb60b3a80a9e63f2d5ad162de
This commit is contained in:
		| @@ -555,6 +555,8 @@ static void Help(void) { | ||||
|          "  -version  .... print version number and exit.\n" | ||||
|          "  -nofancy ..... don't use the fancy YUV420 upscaler.\n" | ||||
|          "  -nofilter .... disable in-loop filtering.\n" | ||||
|          "  -nodither .... disable dithering.\n" | ||||
|          "  -dither <d> .. dithering strength (in 0..100)\n" | ||||
|          "  -mt .......... use multi-threading\n" | ||||
|          "  -crop <x> <y> <w> <h> ... crop output with the given rectangle\n" | ||||
|          "  -scale <w> <h> .......... scale the output (*after* any cropping)\n" | ||||
| @@ -625,6 +627,10 @@ int main(int argc, const char *argv[]) { | ||||
|       format = YUV; | ||||
|     } else if (!strcmp(argv[c], "-mt")) { | ||||
|       config.options.use_threads = 1; | ||||
|     } else if (!strcmp(argv[c], "-nodither")) { | ||||
|       config.options.dithering_strength = 0; | ||||
|     } else if (!strcmp(argv[c], "-dither") && c < argc - 1) { | ||||
|       config.options.dithering_strength = strtol(argv[++c], NULL, 0); | ||||
|     } else if (!strcmp(argv[c], "-crop") && c < argc - 4) { | ||||
|       config.options.use_cropping = 1; | ||||
|       config.options.crop_left   = strtol(argv[++c], NULL, 0); | ||||
| @@ -719,7 +725,7 @@ int main(int argc, const char *argv[]) { | ||||
|     if (!incremental) { | ||||
|       status = WebPDecode(data, data_size, &config); | ||||
|     } else { | ||||
|       WebPIDecoder* const idec = WebPINewDecoder(output_buffer); | ||||
|       WebPIDecoder* const idec = WebPIDecode(data, data_size, &config); | ||||
|       if (idec == NULL) { | ||||
|         fprintf(stderr, "Failed during WebPINewDecoder().\n"); | ||||
|         status = VP8_STATUS_OUT_OF_MEMORY; | ||||
|   | ||||
| @@ -376,6 +376,7 @@ static void Help(void) { | ||||
|          "  -noicc ....... don't use the icc profile if present.\n" | ||||
|          "  -nofancy ..... don't use the fancy YUV420 upscaler.\n" | ||||
|          "  -nofilter .... disable in-loop filtering.\n" | ||||
|          "  -dither <int>  dithering strength (0..100). Default=50.\n" | ||||
|          "  -mt .......... use multi-threading.\n" | ||||
|          "  -info ........ print info.\n" | ||||
|          "  -h     ....... this help message.\n" | ||||
| @@ -397,6 +398,7 @@ int main(int argc, char *argv[]) { | ||||
|     fprintf(stderr, "Library version mismatch!\n"); | ||||
|     return -1; | ||||
|   } | ||||
|   config->options.dithering_strength = 50; | ||||
|   kParams.use_color_profile = 1; | ||||
|  | ||||
|   for (c = 1; c < argc; ++c) { | ||||
| @@ -409,6 +411,8 @@ int main(int argc, char *argv[]) { | ||||
|       config->options.no_fancy_upsampling = 1; | ||||
|     } else if (!strcmp(argv[c], "-nofilter")) { | ||||
|       config->options.bypass_filtering = 1; | ||||
|     } else if (!strcmp(argv[c], "-dither") && c + 1 < argc) { | ||||
|       config->options.dithering_strength = strtol(argv[++c], NULL, 0); | ||||
|     } else if (!strcmp(argv[c], "-info")) { | ||||
|       kParams.print_info = 1; | ||||
|     } else if (!strcmp(argv[c], "-version")) { | ||||
|   | ||||
							
								
								
									
										12
									
								
								man/dwebp.1
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								man/dwebp.1
									
									
									
									
									
								
							| @@ -1,5 +1,5 @@ | ||||
| .\"                                      Hey, EMACS: -*- nroff -*- | ||||
| .TH DWEBP 1 "May 10, 2013" | ||||
| .TH DWEBP 1 "November 26, 2013" | ||||
| .SH NAME | ||||
| dwebp \- decompress a WebP file to an image file | ||||
| .SH SYNOPSIS | ||||
| @@ -55,7 +55,15 @@ edges (especially the red ones), but should be faster. | ||||
| .B \-nofilter | ||||
| Don't use the in-loop filtering process even if it is required by | ||||
| the bitstream. This may produce visible blocks on the non-compliant output, | ||||
| but will make the decoding faster. | ||||
| but it will make the decoding faster. | ||||
| .TP | ||||
| .B \-dither " strength | ||||
| Specify a dithering \fBstrength\fP between 0 and 100. Dithering is a | ||||
| post-processing effect applied to chroma components in lossy compression. | ||||
| It helps by smoothing gradients and avoiding banding artifacts. | ||||
| .TP | ||||
| .B \-nodither | ||||
| Disable all dithering (default). | ||||
| .TP | ||||
| .B \-mt | ||||
| Use multi-threading for decoding, if possible. | ||||
|   | ||||
| @@ -148,6 +148,82 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // Dithering | ||||
|  | ||||
| #define DITHER_AMP_TAB_SIZE 12 | ||||
| static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = { | ||||
|   // roughly, it's dqm->uv_mat_[1] | ||||
|   8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1 | ||||
| }; | ||||
|  | ||||
| void VP8InitDithering(const WebPDecoderOptions* const options, | ||||
|                       VP8Decoder* const dec) { | ||||
|   assert(dec != NULL); | ||||
|   if (options != NULL) { | ||||
|     const int d = options->dithering_strength; | ||||
|     const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1; | ||||
|     const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100); | ||||
|     if (f > 0) { | ||||
|       int s; | ||||
|       int all_amp = 0; | ||||
|       for (s = 0; s < NUM_MB_SEGMENTS; ++s) { | ||||
|         VP8QuantMatrix* const dqm = &dec->dqm_[s]; | ||||
|         if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) { | ||||
|           // TODO(skal): should we specially dither more for uv_quant_ < 0? | ||||
|           const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_; | ||||
|           dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3; | ||||
|         } | ||||
|         all_amp |= dqm->dither_; | ||||
|       } | ||||
|       if (all_amp != 0) { | ||||
|         VP8InitRandom(&dec->dithering_rg_, 1.0f); | ||||
|         dec->dither_ = 1; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| // minimal amp that will provide a non-zero dithering effect | ||||
| #define MIN_DITHER_AMP 4 | ||||
| #define DITHER_DESCALE 4 | ||||
| #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1)) | ||||
| #define DITHER_AMP_BITS 8 | ||||
| #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS) | ||||
|  | ||||
| static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) { | ||||
|   int i, j; | ||||
|   for (j = 0; j < 8; ++j) { | ||||
|     for (i = 0; i < 8; ++i) { | ||||
|       // TODO: could be made faster with SSE2 | ||||
|       const int bits = | ||||
|           VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER; | ||||
|       // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100 | ||||
|       const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE; | ||||
|       const int v = (int)dst[i] + delta; | ||||
|       dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v; | ||||
|     } | ||||
|     dst += bps; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void DitherRow(VP8Decoder* const dec) { | ||||
|   int mb_x; | ||||
|   assert(dec->dither_); | ||||
|   for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) { | ||||
|     const VP8ThreadContext* const ctx = &dec->thread_ctx_; | ||||
|     const VP8MBData* const data = ctx->mb_data_ + mb_x; | ||||
|     const int cache_id = ctx->id_; | ||||
|     const int uv_bps = dec->cache_uv_stride_; | ||||
|     if (data->dither_ >= MIN_DITHER_AMP) { | ||||
|       uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8; | ||||
|       uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8; | ||||
|       Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_); | ||||
|       Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // This function is called after a row of macroblocks is finished decoding. | ||||
| // It also takes into account the following restrictions: | ||||
| @@ -186,6 +262,10 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { | ||||
|     FilterRow(dec); | ||||
|   } | ||||
|  | ||||
|   if (dec->dither_) { | ||||
|     DitherRow(dec); | ||||
|   } | ||||
|  | ||||
|   if (io->put != NULL) { | ||||
|     int y_start = MACROBLOCK_VPOS(mb_y); | ||||
|     int y_end = MACROBLOCK_VPOS(mb_y + 1); | ||||
|   | ||||
| @@ -423,6 +423,7 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) { | ||||
|   // This change must be done before calling VP8InitFrame() | ||||
|   dec->mt_method_ = VP8GetThreadMethod(params->options, NULL, | ||||
|                                        io->width, io->height); | ||||
|   VP8InitDithering(params->options, dec); | ||||
|   if (!CopyParts0Data(idec)) { | ||||
|     return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY); | ||||
|   } | ||||
|   | ||||
| @@ -104,6 +104,8 @@ void VP8ParseQuant(VP8Decoder* const dec) { | ||||
|  | ||||
|       m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)]; | ||||
|       m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)]; | ||||
|  | ||||
|       m->uv_quant_ = q + dquv_ac;   // for dithering strength evaluation | ||||
|     } | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -561,6 +561,12 @@ static int ParseResiduals(VP8Decoder* const dec, | ||||
|  | ||||
|   block->non_zero_y_ = non_zero_y; | ||||
|   block->non_zero_uv_ = non_zero_uv; | ||||
|  | ||||
|   // We look at the mode-code of each block and check if some blocks have less | ||||
|   // than three non-zero coeffs (code < 2). This is to avoid dithering flat and | ||||
|   // empty blocks. | ||||
|   block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_; | ||||
|  | ||||
|   return !(non_zero_y | non_zero_uv);  // will be used for further optimization | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -17,6 +17,7 @@ | ||||
| #include <string.h>     // for memcpy() | ||||
| #include "./vp8li.h" | ||||
| #include "../utils/bit_reader.h" | ||||
| #include "../utils/random.h" | ||||
| #include "../utils/thread.h" | ||||
| #include "../dsp/dsp.h" | ||||
|  | ||||
| @@ -173,6 +174,9 @@ typedef struct {  // Top/Left Contexts used for syntax-parsing | ||||
| typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower). | ||||
| typedef struct { | ||||
|   quant_t y1_mat_, y2_mat_, uv_mat_; | ||||
|  | ||||
|   int uv_quant_;   // U/V quantizer value | ||||
|   int dither_;     // dithering amplitude (0 = off, max=255) | ||||
| } VP8QuantMatrix; | ||||
|  | ||||
| // Data needed to reconstruct a macroblock | ||||
| @@ -190,6 +194,7 @@ typedef struct { | ||||
|   // This allows to call specialized transform functions. | ||||
|   uint32_t non_zero_y_; | ||||
|   uint32_t non_zero_uv_; | ||||
|   uint8_t dither_;      // local dithering strength (deduced from non_zero_*) | ||||
| } VP8MBData; | ||||
|  | ||||
| // Persistent information needed by the parallel processing | ||||
| @@ -244,6 +249,10 @@ struct VP8Decoder { | ||||
|   // per-partition boolean decoders. | ||||
|   VP8BitReader parts_[MAX_NUM_PARTITIONS]; | ||||
|  | ||||
|   // Dithering strength, deduced from decoding options | ||||
|   int dither_;                // whether to use dithering or not | ||||
|   VP8Random dithering_rg_;    // random generator for dithering | ||||
|  | ||||
|   // dequantization (one set of DC/AC dequant factor per segment) | ||||
|   VP8QuantMatrix dqm_[NUM_MB_SEGMENTS]; | ||||
|  | ||||
| @@ -324,7 +333,10 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); | ||||
| int VP8GetThreadMethod(const WebPDecoderOptions* const options, | ||||
|                        const WebPHeaderStructure* const headers, | ||||
|                        int width, int height); | ||||
| // Process the last decoded row (filtering + output) | ||||
| // Initialize dithering post-process if needed. | ||||
| void VP8InitDithering(const WebPDecoderOptions* const options, | ||||
|                       VP8Decoder* const dec); | ||||
| // Process the last decoded row (filtering + output). | ||||
| int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); | ||||
| // To be called at the start of a new scanline, to initialize predictors. | ||||
| void VP8InitScanline(VP8Decoder* const dec); | ||||
|   | ||||
| @@ -474,6 +474,7 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size, | ||||
|         // This change must be done before calling VP8Decode() | ||||
|         dec->mt_method_ = VP8GetThreadMethod(params->options, &headers, | ||||
|                                              io.width, io.height); | ||||
|         VP8InitDithering(params->options, dec); | ||||
|         if (!VP8Decode(dec, &io)) { | ||||
|           status = dec->status_; | ||||
|         } | ||||
|   | ||||
| @@ -34,8 +34,10 @@ typedef struct { | ||||
| void VP8InitRandom(VP8Random* const rg, float dithering); | ||||
|  | ||||
| // Returns a centered pseudo-random number with 'num_bits' amplitude. | ||||
| // (uses D.Knuth's Difference-based random generator) | ||||
| static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { | ||||
| // (uses D.Knuth's Difference-based random generator). | ||||
| // 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision. | ||||
| static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits, | ||||
|                                       int amp) { | ||||
|   int diff; | ||||
|   assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31); | ||||
|   diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_]; | ||||
| @@ -44,11 +46,15 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { | ||||
|   if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0; | ||||
|   if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0; | ||||
|   diff = (diff << 1) >> (32 - num_bits);         // sign-extend, 0-center | ||||
|   diff = (diff * rg->amp_) >> VP8_RANDOM_DITHER_FIX;   // restrict range | ||||
|   diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX;  // restrict range | ||||
|   diff += 1 << (num_bits - 1);                   // shift back to 0.5-center | ||||
|   return diff; | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) { | ||||
|   return VP8RandomBits2(rg, num_bits, rg->amp_); | ||||
| } | ||||
|  | ||||
| #if defined(__cplusplus) || defined(c_plusplus) | ||||
| }    // extern "C" | ||||
| #endif | ||||
|   | ||||
| @@ -20,7 +20,7 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
| #define WEBP_DECODER_ABI_VERSION 0x0202    // MAJOR(8b) + MINOR(8b) | ||||
| #define WEBP_DECODER_ABI_VERSION 0x0203    // MAJOR(8b) + MINOR(8b) | ||||
|  | ||||
| // Note: forward declaring enumerations is not allowed in (strict) C and C++, | ||||
| // the types are left here for reference. | ||||
| @@ -441,11 +441,12 @@ struct WebPDecoderOptions { | ||||
|   int use_scaling;                    // if true, scaling is applied _afterward_ | ||||
|   int scaled_width, scaled_height;    // final resolution | ||||
|   int use_threads;                    // if true, use multi-threaded decoding | ||||
|   int dithering_strength;             // dithering strength (0=Off, 100=full) | ||||
|  | ||||
|   // Unused for now: | ||||
|   int force_rotation;                 // forced rotation (to be applied _last_) | ||||
|   int no_enhancement;                 // if true, discard enhancement layer | ||||
|   uint32_t pad[6];                    // padding for later use | ||||
|   uint32_t pad[5];                    // padding for later use | ||||
| }; | ||||
|  | ||||
| // Main object storing the configuration for advanced decoding. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 skal
					skal