add a -dither dithering option to the decoder
Even at high quality setting, the U/V quantizer step is limited to 4 which can lead to banding on gradient. This option allows to selectively apply some randomness to potentially flattened-out U/V blocks and attenuate the banding. This option is off by default in 'dwebp', but set to -dither 50 by default in 'vwebp'. Note: depending on the number of blocks selectively dithered, we can have up to a 10% slow-down in decoding speed it seems. Change-Id: Icc2446007f33ddacb60b3a80a9e63f2d5ad162de
This commit is contained in:
parent
e812401299
commit
cbdd3e6e53
@ -555,6 +555,8 @@ static void Help(void) {
|
||||
" -version .... print version number and exit.\n"
|
||||
" -nofancy ..... don't use the fancy YUV420 upscaler.\n"
|
||||
" -nofilter .... disable in-loop filtering.\n"
|
||||
" -nodither .... disable dithering.\n"
|
||||
" -dither <d> .. dithering strength (in 0..100)\n"
|
||||
" -mt .......... use multi-threading\n"
|
||||
" -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
|
||||
" -scale <w> <h> .......... scale the output (*after* any cropping)\n"
|
||||
@ -625,6 +627,10 @@ int main(int argc, const char *argv[]) {
|
||||
format = YUV;
|
||||
} else if (!strcmp(argv[c], "-mt")) {
|
||||
config.options.use_threads = 1;
|
||||
} else if (!strcmp(argv[c], "-nodither")) {
|
||||
config.options.dithering_strength = 0;
|
||||
} else if (!strcmp(argv[c], "-dither") && c < argc - 1) {
|
||||
config.options.dithering_strength = strtol(argv[++c], NULL, 0);
|
||||
} else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
|
||||
config.options.use_cropping = 1;
|
||||
config.options.crop_left = strtol(argv[++c], NULL, 0);
|
||||
@ -719,7 +725,7 @@ int main(int argc, const char *argv[]) {
|
||||
if (!incremental) {
|
||||
status = WebPDecode(data, data_size, &config);
|
||||
} else {
|
||||
WebPIDecoder* const idec = WebPINewDecoder(output_buffer);
|
||||
WebPIDecoder* const idec = WebPIDecode(data, data_size, &config);
|
||||
if (idec == NULL) {
|
||||
fprintf(stderr, "Failed during WebPINewDecoder().\n");
|
||||
status = VP8_STATUS_OUT_OF_MEMORY;
|
||||
|
@ -376,6 +376,7 @@ static void Help(void) {
|
||||
" -noicc ....... don't use the icc profile if present.\n"
|
||||
" -nofancy ..... don't use the fancy YUV420 upscaler.\n"
|
||||
" -nofilter .... disable in-loop filtering.\n"
|
||||
" -dither <int> dithering strength (0..100). Default=50.\n"
|
||||
" -mt .......... use multi-threading.\n"
|
||||
" -info ........ print info.\n"
|
||||
" -h ....... this help message.\n"
|
||||
@ -397,6 +398,7 @@ int main(int argc, char *argv[]) {
|
||||
fprintf(stderr, "Library version mismatch!\n");
|
||||
return -1;
|
||||
}
|
||||
config->options.dithering_strength = 50;
|
||||
kParams.use_color_profile = 1;
|
||||
|
||||
for (c = 1; c < argc; ++c) {
|
||||
@ -409,6 +411,8 @@ int main(int argc, char *argv[]) {
|
||||
config->options.no_fancy_upsampling = 1;
|
||||
} else if (!strcmp(argv[c], "-nofilter")) {
|
||||
config->options.bypass_filtering = 1;
|
||||
} else if (!strcmp(argv[c], "-dither") && c + 1 < argc) {
|
||||
config->options.dithering_strength = strtol(argv[++c], NULL, 0);
|
||||
} else if (!strcmp(argv[c], "-info")) {
|
||||
kParams.print_info = 1;
|
||||
} else if (!strcmp(argv[c], "-version")) {
|
||||
|
12
man/dwebp.1
12
man/dwebp.1
@ -1,5 +1,5 @@
|
||||
.\" Hey, EMACS: -*- nroff -*-
|
||||
.TH DWEBP 1 "May 10, 2013"
|
||||
.TH DWEBP 1 "November 26, 2013"
|
||||
.SH NAME
|
||||
dwebp \- decompress a WebP file to an image file
|
||||
.SH SYNOPSIS
|
||||
@ -55,7 +55,15 @@ edges (especially the red ones), but should be faster.
|
||||
.B \-nofilter
|
||||
Don't use the in-loop filtering process even if it is required by
|
||||
the bitstream. This may produce visible blocks on the non-compliant output,
|
||||
but will make the decoding faster.
|
||||
but it will make the decoding faster.
|
||||
.TP
|
||||
.B \-dither " strength
|
||||
Specify a dithering \fBstrength\fP between 0 and 100. Dithering is a
|
||||
post-processing effect applied to chroma components in lossy compression.
|
||||
It helps by smoothing gradients and avoiding banding artifacts.
|
||||
.TP
|
||||
.B \-nodither
|
||||
Disable all dithering (default).
|
||||
.TP
|
||||
.B \-mt
|
||||
Use multi-threading for decoding, if possible.
|
||||
|
@ -148,6 +148,82 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Dithering
|
||||
|
||||
#define DITHER_AMP_TAB_SIZE 12
|
||||
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
|
||||
// roughly, it's dqm->uv_mat_[1]
|
||||
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
|
||||
};
|
||||
|
||||
void VP8InitDithering(const WebPDecoderOptions* const options,
|
||||
VP8Decoder* const dec) {
|
||||
assert(dec != NULL);
|
||||
if (options != NULL) {
|
||||
const int d = options->dithering_strength;
|
||||
const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
|
||||
const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
|
||||
if (f > 0) {
|
||||
int s;
|
||||
int all_amp = 0;
|
||||
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
||||
VP8QuantMatrix* const dqm = &dec->dqm_[s];
|
||||
if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
|
||||
// TODO(skal): should we specially dither more for uv_quant_ < 0?
|
||||
const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
|
||||
dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
|
||||
}
|
||||
all_amp |= dqm->dither_;
|
||||
}
|
||||
if (all_amp != 0) {
|
||||
VP8InitRandom(&dec->dithering_rg_, 1.0f);
|
||||
dec->dither_ = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// minimal amp that will provide a non-zero dithering effect
|
||||
#define MIN_DITHER_AMP 4
|
||||
#define DITHER_DESCALE 4
|
||||
#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
|
||||
#define DITHER_AMP_BITS 8
|
||||
#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
|
||||
|
||||
static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
// TODO: could be made faster with SSE2
|
||||
const int bits =
|
||||
VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
|
||||
// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
|
||||
const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
|
||||
const int v = (int)dst[i] + delta;
|
||||
dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
|
||||
}
|
||||
dst += bps;
|
||||
}
|
||||
}
|
||||
|
||||
static void DitherRow(VP8Decoder* const dec) {
|
||||
int mb_x;
|
||||
assert(dec->dither_);
|
||||
for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
|
||||
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
|
||||
const VP8MBData* const data = ctx->mb_data_ + mb_x;
|
||||
const int cache_id = ctx->id_;
|
||||
const int uv_bps = dec->cache_uv_stride_;
|
||||
if (data->dither_ >= MIN_DITHER_AMP) {
|
||||
uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
|
||||
uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
|
||||
Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
|
||||
Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// This function is called after a row of macroblocks is finished decoding.
|
||||
// It also takes into account the following restrictions:
|
||||
@ -186,6 +262,10 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
|
||||
FilterRow(dec);
|
||||
}
|
||||
|
||||
if (dec->dither_) {
|
||||
DitherRow(dec);
|
||||
}
|
||||
|
||||
if (io->put != NULL) {
|
||||
int y_start = MACROBLOCK_VPOS(mb_y);
|
||||
int y_end = MACROBLOCK_VPOS(mb_y + 1);
|
||||
|
@ -423,6 +423,7 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
|
||||
// This change must be done before calling VP8InitFrame()
|
||||
dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
|
||||
io->width, io->height);
|
||||
VP8InitDithering(params->options, dec);
|
||||
if (!CopyParts0Data(idec)) {
|
||||
return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
|
||||
}
|
||||
|
@ -104,6 +104,8 @@ void VP8ParseQuant(VP8Decoder* const dec) {
|
||||
|
||||
m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
|
||||
m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
|
||||
|
||||
m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -561,6 +561,12 @@ static int ParseResiduals(VP8Decoder* const dec,
|
||||
|
||||
block->non_zero_y_ = non_zero_y;
|
||||
block->non_zero_uv_ = non_zero_uv;
|
||||
|
||||
// We look at the mode-code of each block and check if some blocks have less
|
||||
// than three non-zero coeffs (code < 2). This is to avoid dithering flat and
|
||||
// empty blocks.
|
||||
block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
|
||||
|
||||
return !(non_zero_y | non_zero_uv); // will be used for further optimization
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <string.h> // for memcpy()
|
||||
#include "./vp8li.h"
|
||||
#include "../utils/bit_reader.h"
|
||||
#include "../utils/random.h"
|
||||
#include "../utils/thread.h"
|
||||
#include "../dsp/dsp.h"
|
||||
|
||||
@ -173,6 +174,9 @@ typedef struct { // Top/Left Contexts used for syntax-parsing
|
||||
typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower).
|
||||
typedef struct {
|
||||
quant_t y1_mat_, y2_mat_, uv_mat_;
|
||||
|
||||
int uv_quant_; // U/V quantizer value
|
||||
int dither_; // dithering amplitude (0 = off, max=255)
|
||||
} VP8QuantMatrix;
|
||||
|
||||
// Data needed to reconstruct a macroblock
|
||||
@ -190,6 +194,7 @@ typedef struct {
|
||||
// This allows to call specialized transform functions.
|
||||
uint32_t non_zero_y_;
|
||||
uint32_t non_zero_uv_;
|
||||
uint8_t dither_; // local dithering strength (deduced from non_zero_*)
|
||||
} VP8MBData;
|
||||
|
||||
// Persistent information needed by the parallel processing
|
||||
@ -244,6 +249,10 @@ struct VP8Decoder {
|
||||
// per-partition boolean decoders.
|
||||
VP8BitReader parts_[MAX_NUM_PARTITIONS];
|
||||
|
||||
// Dithering strength, deduced from decoding options
|
||||
int dither_; // whether to use dithering or not
|
||||
VP8Random dithering_rg_; // random generator for dithering
|
||||
|
||||
// dequantization (one set of DC/AC dequant factor per segment)
|
||||
VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
|
||||
|
||||
@ -324,7 +333,10 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
|
||||
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
|
||||
const WebPHeaderStructure* const headers,
|
||||
int width, int height);
|
||||
// Process the last decoded row (filtering + output)
|
||||
// Initialize dithering post-process if needed.
|
||||
void VP8InitDithering(const WebPDecoderOptions* const options,
|
||||
VP8Decoder* const dec);
|
||||
// Process the last decoded row (filtering + output).
|
||||
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
|
||||
// To be called at the start of a new scanline, to initialize predictors.
|
||||
void VP8InitScanline(VP8Decoder* const dec);
|
||||
|
@ -474,6 +474,7 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
|
||||
// This change must be done before calling VP8Decode()
|
||||
dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
|
||||
io.width, io.height);
|
||||
VP8InitDithering(params->options, dec);
|
||||
if (!VP8Decode(dec, &io)) {
|
||||
status = dec->status_;
|
||||
}
|
||||
|
@ -34,8 +34,10 @@ typedef struct {
|
||||
void VP8InitRandom(VP8Random* const rg, float dithering);
|
||||
|
||||
// Returns a centered pseudo-random number with 'num_bits' amplitude.
|
||||
// (uses D.Knuth's Difference-based random generator)
|
||||
static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
|
||||
// (uses D.Knuth's Difference-based random generator).
|
||||
// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision.
|
||||
static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits,
|
||||
int amp) {
|
||||
int diff;
|
||||
assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31);
|
||||
diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_];
|
||||
@ -44,11 +46,15 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
|
||||
if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0;
|
||||
if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0;
|
||||
diff = (diff << 1) >> (32 - num_bits); // sign-extend, 0-center
|
||||
diff = (diff * rg->amp_) >> VP8_RANDOM_DITHER_FIX; // restrict range
|
||||
diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range
|
||||
diff += 1 << (num_bits - 1); // shift back to 0.5-center
|
||||
return diff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
|
||||
return VP8RandomBits2(rg, num_bits, rg->amp_);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -20,7 +20,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define WEBP_DECODER_ABI_VERSION 0x0202 // MAJOR(8b) + MINOR(8b)
|
||||
#define WEBP_DECODER_ABI_VERSION 0x0203 // MAJOR(8b) + MINOR(8b)
|
||||
|
||||
// Note: forward declaring enumerations is not allowed in (strict) C and C++,
|
||||
// the types are left here for reference.
|
||||
@ -441,11 +441,12 @@ struct WebPDecoderOptions {
|
||||
int use_scaling; // if true, scaling is applied _afterward_
|
||||
int scaled_width, scaled_height; // final resolution
|
||||
int use_threads; // if true, use multi-threaded decoding
|
||||
int dithering_strength; // dithering strength (0=Off, 100=full)
|
||||
|
||||
// Unused for now:
|
||||
int force_rotation; // forced rotation (to be applied _last_)
|
||||
int no_enhancement; // if true, discard enhancement layer
|
||||
uint32_t pad[6]; // padding for later use
|
||||
uint32_t pad[5]; // padding for later use
|
||||
};
|
||||
|
||||
// Main object storing the configuration for advanced decoding.
|
||||
|
Loading…
x
Reference in New Issue
Block a user