Merge "introduce a generic GetCoeffs() function pointer"
This commit is contained in:
commit
1dc82a6bba
@ -26,6 +26,16 @@ int WebPGetDecoderVersion(void) {
|
||||
return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Signature and pointer-to-function for GetCoeffs() variants below.
|
||||
|
||||
typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
|
||||
const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out);
|
||||
static volatile GetCoeffsFunc GetCoeffs = NULL;
|
||||
|
||||
static void InitGetCoeffs(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VP8Decoder
|
||||
|
||||
@ -51,6 +61,7 @@ VP8Decoder* VP8New(void) {
|
||||
WebPGetWorkerInterface()->Init(&dec->worker_);
|
||||
dec->ready_ = 0;
|
||||
dec->num_parts_minus_one_ = 0;
|
||||
InitGetCoeffs();
|
||||
}
|
||||
return dec;
|
||||
}
|
||||
@ -422,8 +433,9 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
|
||||
}
|
||||
|
||||
// Returns the position of the last non-zero coeff plus one
|
||||
static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out) {
|
||||
static int GetCoeffsFast(VP8BitReader* const br,
|
||||
const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out) {
|
||||
const uint8_t* p = prob[n]->probas_[ctx];
|
||||
for (; n < 16; ++n) {
|
||||
if (!VP8GetBit(br, p[0])) {
|
||||
@ -449,6 +461,46 @@ static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
|
||||
return 16;
|
||||
}
|
||||
|
||||
// This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
|
||||
// of VP8GetBitAlt() targeting specific platforms.
|
||||
static int GetCoeffsAlt(VP8BitReader* const br,
|
||||
const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out) {
|
||||
const uint8_t* p = prob[n]->probas_[ctx];
|
||||
for (; n < 16; ++n) {
|
||||
if (!VP8GetBitAlt(br, p[0])) {
|
||||
return n; // previous coeff was last non-zero coeff
|
||||
}
|
||||
while (!VP8GetBitAlt(br, p[1])) { // sequence of zero coeffs
|
||||
p = prob[++n]->probas_[0];
|
||||
if (n == 16) return 16;
|
||||
}
|
||||
{ // non zero coeff
|
||||
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
|
||||
int v;
|
||||
if (!VP8GetBitAlt(br, p[2])) {
|
||||
v = 1;
|
||||
p = p_ctx[1];
|
||||
} else {
|
||||
v = GetLargeValue(br, p);
|
||||
p = p_ctx[2];
|
||||
}
|
||||
out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
|
||||
}
|
||||
}
|
||||
return 16;
|
||||
}
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION static void InitGetCoeffs(void) {
|
||||
if (GetCoeffs == NULL) {
|
||||
if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
|
||||
GetCoeffs = GetCoeffsAlt;
|
||||
} else {
|
||||
GetCoeffs = GetCoeffsFast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
|
||||
nz_coeffs <<= 2;
|
||||
nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "../dsp/dsp.h"
|
||||
#include "./bit_reader.h"
|
||||
#include "./endian_inl.h"
|
||||
#include "./utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -163,6 +164,37 @@ int VP8GetSigned(VP8BitReader* const br, int v) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
|
||||
// Don't move this declaration! It makes a big speed difference to store
|
||||
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
|
||||
// alter br->range_ value.
|
||||
range_t range = br->range_;
|
||||
if (br->bits_ < 0) {
|
||||
VP8LoadNewBytes(br);
|
||||
}
|
||||
{
|
||||
const int pos = br->bits_;
|
||||
const range_t split = (range * prob) >> 8;
|
||||
const range_t value = (range_t)(br->value_ >> pos);
|
||||
int bit; // Don't use 'const int bit = (value > split);", it's slower.
|
||||
if (value > split) {
|
||||
range -= split + 1;
|
||||
br->value_ -= (bit_t)(split + 1) << pos;
|
||||
bit = 1;
|
||||
} else {
|
||||
range = split;
|
||||
bit = 0;
|
||||
}
|
||||
if (range <= (range_t)0x7e) {
|
||||
const int shift = kVP8Log2Range[range];
|
||||
range = kVP8NewRange[range];
|
||||
br->bits_ -= shift;
|
||||
}
|
||||
br->range_ = range;
|
||||
return bit;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user