Speed up lossless encoder.
Speedup lossless encoder by 20-25% by optimizing: - GetBestColorTransformForTile: Use techniques like binary search and local minima search to reduce the search space. - VP8LFastSLog2Slow & VP8LFastLog2Slow: Adding the correction factor for log(1 + x) and increase the threshold for calling the approximate version of log_2 (compared to costly call to log()). Change-Id: Ia2444c914521ac298492aafa458e617028fc2f9d
This commit is contained in:
parent
77a8f91981
commit
c16cd99aba
@ -28,8 +28,6 @@
|
||||
#define MAX_DIFF_COST (1e30f)
|
||||
|
||||
// lookup table for small values of log2(int)
|
||||
#define APPROX_LOG_MAX 4096
|
||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
||||
0.0000000000000000f, 0.0000000000000000f,
|
||||
1.0000000000000000f, 1.5849625007211560f,
|
||||
@ -331,16 +329,34 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
||||
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
|
||||
};
|
||||
|
||||
// The threshold till approximate version of log_2 can be used.
|
||||
// Practically, we can get rid of the call to log() as the two values match to
|
||||
// very high degree (the ratio of these two is 0.99999x).
|
||||
// Keeping a high threshold for now.
|
||||
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
||||
#define APPROX_LOG_MAX 4096
|
||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||
float VP8LFastSLog2Slow(int v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_MAX) {
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
int log_cnt = 0;
|
||||
int y = 1;
|
||||
int correction = 0;
|
||||
const float v_f = (float)v;
|
||||
while (v >= LOG_LOOKUP_IDX_MAX) {
|
||||
const int orig_v = v;
|
||||
do {
|
||||
++log_cnt;
|
||||
v = v >> 1;
|
||||
}
|
||||
return v_f * (kLog2Table[v] + log_cnt);
|
||||
y = y << 1;
|
||||
} while (v >= LOG_LOOKUP_IDX_MAX);
|
||||
// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
|
||||
// Xf = floor(Xf) * (1 + (v % y) / v)
|
||||
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
||||
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
||||
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
||||
// LOG_2_RECIPROCAL ~ 23/16
|
||||
correction = (23 * (orig_v % y)) >> 4;
|
||||
return v_f * (kLog2Table[v] + log_cnt) + correction;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
|
||||
}
|
||||
@ -348,13 +364,24 @@ float VP8LFastSLog2Slow(int v) {
|
||||
|
||||
float VP8LFastLog2Slow(int v) {
|
||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||
if (v < APPROX_LOG_MAX) {
|
||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||
int log_cnt = 0;
|
||||
while (v >= LOG_LOOKUP_IDX_MAX) {
|
||||
int y = 1;
|
||||
const int orig_v = v;
|
||||
double log_2;
|
||||
do {
|
||||
++log_cnt;
|
||||
v = v >> 1;
|
||||
y = y << 1;
|
||||
} while (v >= LOG_LOOKUP_IDX_MAX);
|
||||
log_2 = kLog2Table[v] + log_cnt;
|
||||
if (orig_v >= APPROX_LOG_MAX) {
|
||||
// Since the division is still expensive, add this correction factor only
|
||||
// for large values of 'v'.
|
||||
const int correction = (23 * (orig_v % y)) >> 4;
|
||||
log_2 += (double)correction / orig_v;
|
||||
}
|
||||
return kLog2Table[v] + log_cnt;
|
||||
return (float)log_2;
|
||||
} else {
|
||||
return (float)(LOG_2_RECIPROCAL * log((double)v));
|
||||
}
|
||||
@ -881,32 +908,13 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
||||
PredictionCostSpatial(counts, 3, kExpValue);
|
||||
}
|
||||
|
||||
static Multipliers GetBestColorTransformForTile(
|
||||
int tile_x, int tile_y, int bits,
|
||||
Multipliers prevX,
|
||||
Multipliers prevY,
|
||||
int step, int xsize, int ysize,
|
||||
int* accumulated_red_histo,
|
||||
int* accumulated_blue_histo,
|
||||
const uint32_t* const argb) {
|
||||
float best_diff = MAX_DIFF_COST;
|
||||
float cur_diff;
|
||||
const int halfstep = step / 2;
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_y_offset = tile_y * max_tile_size;
|
||||
const int tile_x_offset = tile_x * max_tile_size;
|
||||
int green_to_red;
|
||||
int green_to_blue;
|
||||
int red_to_blue;
|
||||
const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
|
||||
const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
|
||||
Multipliers best_tx;
|
||||
MultipliersClear(&best_tx);
|
||||
|
||||
for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
|
||||
int histo[256] = { 0 };
|
||||
static float GetPredictionCostCrossColorRed(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y, int green_to_red,
|
||||
const int* const accumulated_red_histo, const uint32_t* const argb) {
|
||||
int all_y;
|
||||
|
||||
int histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
|
||||
int ix = all_y * xsize + tile_x_offset;
|
||||
int all_x;
|
||||
@ -918,25 +926,65 @@ static Multipliers GetBestColorTransformForTile(
|
||||
}
|
||||
}
|
||||
cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
|
||||
if ((uint8_t)green_to_red == prevX.green_to_red_) {
|
||||
if ((uint8_t)green_to_red == prev_x.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
}
|
||||
if ((uint8_t)green_to_red == prevY.green_to_red_) {
|
||||
if ((uint8_t)green_to_red == prev_y.green_to_red_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
}
|
||||
if (green_to_red == 0) {
|
||||
cur_diff -= 3;
|
||||
}
|
||||
if (cur_diff < best_diff) {
|
||||
best_diff = cur_diff;
|
||||
best_tx.green_to_red_ = green_to_red;
|
||||
return cur_diff;
|
||||
}
|
||||
|
||||
static void GetBestGreenToRed(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y,
|
||||
const int* const accumulated_red_histo, const uint32_t* const argb,
|
||||
Multipliers* best_tx) {
|
||||
int min_green_to_red = -64;
|
||||
int max_green_to_red = 64;
|
||||
int green_to_red = 0;
|
||||
int eval_min = 1;
|
||||
int eval_max = 1;
|
||||
float cur_diff_min = MAX_DIFF_COST;
|
||||
float cur_diff_max = MAX_DIFF_COST;
|
||||
// Do a binary search to find the optimal green_to_red color transform.
|
||||
while (max_green_to_red - min_green_to_red > 2) {
|
||||
if (eval_min) {
|
||||
cur_diff_min = GetPredictionCostCrossColorRed(
|
||||
tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
||||
prev_x, prev_y, min_green_to_red, &accumulated_red_histo[0], argb);
|
||||
eval_min = 0;
|
||||
}
|
||||
if (eval_max) {
|
||||
cur_diff_max = GetPredictionCostCrossColorRed(
|
||||
tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
||||
prev_x, prev_y, max_green_to_red, &accumulated_red_histo[0], argb);
|
||||
eval_max = 0;
|
||||
}
|
||||
if (cur_diff_min < cur_diff_max) {
|
||||
green_to_red = min_green_to_red;
|
||||
max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
|
||||
eval_max = 1;
|
||||
} else {
|
||||
green_to_red = max_green_to_red;
|
||||
min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
|
||||
eval_min = 1;
|
||||
}
|
||||
}
|
||||
best_diff = MAX_DIFF_COST;
|
||||
for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
|
||||
for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
|
||||
best_tx->green_to_red_ = green_to_red;
|
||||
}
|
||||
|
||||
static float GetPredictionCostCrossColorBlue(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y, int green_to_blue,
|
||||
int red_to_blue, const int* const accumulated_blue_histo,
|
||||
const uint32_t* const argb) {
|
||||
int all_y;
|
||||
int histo[256] = { 0 };
|
||||
float cur_diff;
|
||||
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
|
||||
int all_x;
|
||||
int ix = all_y * xsize + tile_x_offset;
|
||||
@ -947,18 +995,17 @@ static Multipliers GetBestColorTransformForTile(
|
||||
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
|
||||
}
|
||||
}
|
||||
cur_diff =
|
||||
PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
|
||||
if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
|
||||
cur_diff = PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
|
||||
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
}
|
||||
if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
|
||||
if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
}
|
||||
if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
|
||||
if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
}
|
||||
if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
|
||||
if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
|
||||
cur_diff -= 3; // favor keeping the areas locally similar
|
||||
}
|
||||
if (green_to_blue == 0) {
|
||||
@ -967,13 +1014,74 @@ static Multipliers GetBestColorTransformForTile(
|
||||
if (red_to_blue == 0) {
|
||||
cur_diff -= 3;
|
||||
}
|
||||
return cur_diff;
|
||||
}
|
||||
|
||||
static void GetBestGreenRedToBlue(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y, int quality,
|
||||
const int* const accumulated_blue_histo, const uint32_t* const argb,
|
||||
Multipliers* best_tx) {
|
||||
float best_diff = MAX_DIFF_COST;
|
||||
float cur_diff;
|
||||
const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
|
||||
const int min_green_to_blue = -32;
|
||||
const int max_green_to_blue = 32;
|
||||
const int min_red_to_blue = -16;
|
||||
const int max_red_to_blue = 16;
|
||||
const int num_iters =
|
||||
(1 + (max_green_to_blue - min_green_to_blue) / step) *
|
||||
(1 + (max_red_to_blue - min_red_to_blue) / step);
|
||||
// Number of tries to get optimal green_to_blue & red_to_blue color transforms
|
||||
// after finding a local minima.
|
||||
const int max_tries_after_min = 4 + (num_iters >> 2);
|
||||
int num_tries_after_min = 0;
|
||||
int green_to_blue;
|
||||
for (green_to_blue = min_green_to_blue;
|
||||
green_to_blue <= max_green_to_blue &&
|
||||
num_tries_after_min < max_tries_after_min;
|
||||
green_to_blue += step) {
|
||||
int red_to_blue;
|
||||
for (red_to_blue = min_red_to_blue;
|
||||
red_to_blue <= max_red_to_blue &&
|
||||
num_tries_after_min < max_tries_after_min;
|
||||
red_to_blue += step) {
|
||||
cur_diff = GetPredictionCostCrossColorBlue(
|
||||
tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
|
||||
prev_y, green_to_blue, red_to_blue, &accumulated_blue_histo[0], argb);
|
||||
if (cur_diff < best_diff) {
|
||||
best_diff = cur_diff;
|
||||
best_tx.green_to_blue_ = green_to_blue;
|
||||
best_tx.red_to_blue_ = red_to_blue;
|
||||
best_tx->green_to_blue_ = green_to_blue;
|
||||
best_tx->red_to_blue_ = red_to_blue;
|
||||
num_tries_after_min = 0;
|
||||
} else {
|
||||
++num_tries_after_min;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Multipliers GetBestColorTransformForTile(
|
||||
int tile_x, int tile_y, int bits,
|
||||
Multipliers prev_x,
|
||||
Multipliers prev_y,
|
||||
int quality, int xsize, int ysize,
|
||||
const int* const accumulated_red_histo,
|
||||
const int* const accumulated_blue_histo,
|
||||
const uint32_t* const argb) {
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_y_offset = tile_y * max_tile_size;
|
||||
const int tile_x_offset = tile_x * max_tile_size;
|
||||
const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
|
||||
const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
|
||||
Multipliers best_tx;
|
||||
MultipliersClear(&best_tx);
|
||||
|
||||
GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
||||
prev_x, prev_y, &accumulated_red_histo[0], argb, &best_tx);
|
||||
GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
|
||||
xsize, prev_x, prev_y, quality,
|
||||
&accumulated_blue_histo[0], argb, &best_tx);
|
||||
return best_tx;
|
||||
}
|
||||
|
||||
@ -994,7 +1102,7 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int step,
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image) {
|
||||
const int max_tile_size = 1 << bits;
|
||||
const int tile_xsize = VP8LSubSampleSize(width, bits);
|
||||
@ -1018,14 +1126,13 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int step,
|
||||
}
|
||||
prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
|
||||
prev_x, prev_y,
|
||||
step, width, height,
|
||||
quality, width, height,
|
||||
&accumulated_red_histo[0],
|
||||
&accumulated_blue_histo[0],
|
||||
argb);
|
||||
image[offset] = MultipliersToColorCode(&prev_x);
|
||||
CopyTileWithColorTransform(width, height,
|
||||
tile_x_offset, tile_y_offset, max_tile_size,
|
||||
prev_x, argb);
|
||||
CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
|
||||
max_tile_size, prev_x, argb);
|
||||
|
||||
// Gather accumulated histogram data.
|
||||
for (y = tile_y_offset; y < all_y_max; ++y) {
|
||||
|
@ -66,7 +66,7 @@ void VP8LResidualImage(int width, int height, int bits,
|
||||
uint32_t* const argb, uint32_t* const argb_scratch,
|
||||
uint32_t* const image);
|
||||
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int step,
|
||||
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
uint32_t* const argb, uint32_t* image);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -695,9 +695,8 @@ static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
|
||||
const int ccolor_transform_bits = enc->transform_bits_;
|
||||
const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
|
||||
const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
|
||||
const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
|
||||
|
||||
VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step,
|
||||
VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
|
||||
enc->argb_, enc->transform_data_);
|
||||
VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
|
||||
VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);
|
||||
|
Loading…
Reference in New Issue
Block a user