Properly compute the optimal color cache size.
The previous optimization was performing dichotomy on a function that is anything in practice, hence a bit of randomness. Also, two magic constants were used, one for an extra constant cost, one for an extra linear cost. Both values/models were empirical. A brute force search for the best cache size is now performed. To have less CPU impact, a speed optimization is also made by not inserting a value again and again. This makes sense but it's also the most common case of when LZ77 is useful hence an overall improvement sometimes. Change-Id: I57de5750ad2313b2feecbcd15cd6e4feeb98e5c8
This commit is contained in:
parent
8874b16275
commit
527844fee0
@ -1526,64 +1526,88 @@ static void BackwardReferences2DLocality(int xsize,
|
||||
}
|
||||
}
|
||||
|
||||
// Computes the entropy for the given cache bits.
|
||||
// Computes the entropies for a color cache size (in bits) between 0 (unused)
|
||||
// and cache_bits_max (inclusive).
|
||||
// Returns 1 on success, 0 in case of allocation error.
|
||||
static int ComputeCacheEntropy(const uint32_t* argb,
|
||||
static int ComputeCacheEntropies(const uint32_t* argb,
|
||||
const VP8LBackwardRefs* const refs,
|
||||
int cache_bits, double* entropy) {
|
||||
const int use_color_cache = (cache_bits > 0);
|
||||
int cc_init = 0;
|
||||
const double kSmallPenaltyForLargeCache = 4.0;
|
||||
VP8LColorCache hashers;
|
||||
int cache_bits_max, double entropies[]) {
|
||||
int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
|
||||
VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
|
||||
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
|
||||
VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
|
||||
VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL };
|
||||
int ok = 0;
|
||||
if (histo == NULL) goto Error;
|
||||
int i;
|
||||
|
||||
if (use_color_cache) {
|
||||
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
|
||||
if (!cc_init) goto Error;
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
histos[i] = VP8LAllocateHistogram(i);
|
||||
if (histos[i] == NULL) goto Error;
|
||||
if (i == 0) continue;
|
||||
cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
|
||||
if (!cc_init[i]) goto Error;
|
||||
}
|
||||
if (!use_color_cache) {
|
||||
|
||||
assert(cache_bits_max >= 0);
|
||||
// Do not use the color cache for cache_bits=0.
|
||||
while (VP8LRefsCursorOk(&c)) {
|
||||
VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
|
||||
VP8LHistogramAddSinglePixOrCopy(histos[0], c.cur_pos);
|
||||
VP8LRefsCursorNext(&c);
|
||||
}
|
||||
} else {
|
||||
if (cache_bits_max > 0) {
|
||||
c = VP8LRefsCursorInit(refs);
|
||||
while (VP8LRefsCursorOk(&c)) {
|
||||
const PixOrCopy* const v = c.cur_pos;
|
||||
if (PixOrCopyIsLiteral(v)) {
|
||||
const uint32_t pix = *argb++;
|
||||
const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix);
|
||||
if (VP8LColorCacheLookup(&hashers, key) == pix) {
|
||||
++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
|
||||
// The keys of the caches can be derived from the longest one.
|
||||
int key = HashPix(pix, 32 - cache_bits_max);
|
||||
for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
|
||||
if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
|
||||
++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
|
||||
} else {
|
||||
VP8LColorCacheSet(&hashers, key, pix);
|
||||
++histo->blue_[pix & 0xff];
|
||||
++histo->literal_[(pix >> 8) & 0xff];
|
||||
++histo->red_[(pix >> 16) & 0xff];
|
||||
++histo->alpha_[pix >> 24];
|
||||
VP8LColorCacheSet(&hashers[i], key, pix);
|
||||
++histos[i]->blue_[pix & 0xff];
|
||||
++histos[i]->literal_[(pix >> 8) & 0xff];
|
||||
++histos[i]->red_[(pix >> 16) & 0xff];
|
||||
++histos[i]->alpha_[pix >> 24];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Update the histograms for distance/length.
|
||||
int len = PixOrCopyLength(v);
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(len, &code, &extra_bits);
|
||||
++histo->literal_[NUM_LITERAL_CODES + code];
|
||||
VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
|
||||
++histo->distance_[code];
|
||||
int code_dist, code_len, extra_bits;
|
||||
uint32_t argb_prev = *argb - 1;
|
||||
VP8LPrefixEncodeBits(len, &code_len, &extra_bits);
|
||||
VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code_dist, &extra_bits);
|
||||
for (i = 1; i <= cache_bits_max; ++i) {
|
||||
++histos[i]->literal_[NUM_LITERAL_CODES + code_len];
|
||||
++histos[i]->distance_[code_dist];
|
||||
}
|
||||
// Update the colors caches.
|
||||
do {
|
||||
VP8LColorCacheInsert(&hashers, *argb++);
|
||||
if (*argb != argb_prev) {
|
||||
// Efficiency: insert only if the color changes.
|
||||
int key = HashPix(*argb, 32 - cache_bits_max);
|
||||
for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
|
||||
hashers[i].colors_[key] = *argb;
|
||||
}
|
||||
argb_prev = *argb;
|
||||
}
|
||||
argb++;
|
||||
} while (--len != 0);
|
||||
}
|
||||
VP8LRefsCursorNext(&c);
|
||||
}
|
||||
}
|
||||
*entropy = VP8LHistogramEstimateBits(histo) +
|
||||
kSmallPenaltyForLargeCache * cache_bits;
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
entropies[i] = VP8LHistogramEstimateBits(histos[i]);
|
||||
}
|
||||
ok = 1;
|
||||
Error:
|
||||
if (cc_init) VP8LColorCacheClear(&hashers);
|
||||
VP8LFreeHistogram(histo);
|
||||
for (i = 0; i <= cache_bits_max; ++i) {
|
||||
if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
|
||||
VP8LFreeHistogram(histos[i]);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
@ -1598,13 +1622,10 @@ static int CalculateBestCacheSize(const uint32_t* const argb,
|
||||
VP8LBackwardRefs* const refs,
|
||||
int* const lz77_computed,
|
||||
int* const best_cache_bits) {
|
||||
int eval_low = 1;
|
||||
int eval_high = 1;
|
||||
double entropy_low = MAX_ENTROPY;
|
||||
double entropy_high = MAX_ENTROPY;
|
||||
const double cost_mul = 5e-4;
|
||||
int cache_bits_low = 0;
|
||||
int i;
|
||||
int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits;
|
||||
double entropy_min = MAX_ENTROPY;
|
||||
double entropies[MAX_COLOR_CACHE_BITS + 1];
|
||||
|
||||
assert(cache_bits_high <= MAX_COLOR_CACHE_BITS);
|
||||
|
||||
@ -1614,36 +1635,21 @@ static int CalculateBestCacheSize(const uint32_t* const argb,
|
||||
// Local color cache is disabled.
|
||||
return 1;
|
||||
}
|
||||
if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, hash_chain,
|
||||
refs)) {
|
||||
// Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color cache
|
||||
// is not that different in practice.
|
||||
if (!BackwardReferencesLz77(xsize, ysize, argb, 0, hash_chain, refs)) {
|
||||
return 0;
|
||||
}
|
||||
*lz77_computed = 1;
|
||||
// Do a binary search to find the optimal entropy for cache_bits.
|
||||
while (eval_low || eval_high) {
|
||||
if (eval_low) {
|
||||
if (!ComputeCacheEntropy(argb, refs, cache_bits_low, &entropy_low)) {
|
||||
// Find the cache_bits giving the lowest entropy. The search is done in a
|
||||
// brute-force way as the function (entropy w.r.t cache_bits) can be
|
||||
// anything in practice.
|
||||
if (!ComputeCacheEntropies(argb, refs, cache_bits_high, entropies)) {
|
||||
return 0;
|
||||
}
|
||||
entropy_low += entropy_low * cache_bits_low * cost_mul;
|
||||
eval_low = 0;
|
||||
}
|
||||
if (eval_high) {
|
||||
if (!ComputeCacheEntropy(argb, refs, cache_bits_high, &entropy_high )) {
|
||||
return 0;
|
||||
}
|
||||
entropy_high += entropy_high * cache_bits_high * cost_mul;
|
||||
eval_high = 0;
|
||||
}
|
||||
if (entropy_high < entropy_low) {
|
||||
const int prev_cache_bits_low = cache_bits_low;
|
||||
*best_cache_bits = cache_bits_high;
|
||||
cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
|
||||
if (cache_bits_low != prev_cache_bits_low) eval_low = 1;
|
||||
} else {
|
||||
*best_cache_bits = cache_bits_low;
|
||||
cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
|
||||
if (cache_bits_high != cache_bits_low) eval_high = 1;
|
||||
for (i = 0; i <= cache_bits_high; ++i) {
|
||||
if (i == 0 || entropies[i] < entropy_min) {
|
||||
entropy_min = entropies[i];
|
||||
*best_cache_bits = i;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
|
Loading…
Reference in New Issue
Block a user