Remove ReadOneBit() and ReadSymbolUnsafe()
Simplify and re-organize the VP8L bit-reader functions (e.g.: the 40-bit look-ahead code was helping much) Speed-up with LBITS=64, on arm7-a: => before: ./dwebp_justify_24_neon -v bryce_ll.webp Time to decode picture: 11.393s File bryce_ll.webp can be decoded (dimensions: 11158 x 2156). ... => after (LBITS=64): Time to decode picture: 9.953s making the VP8L bit-reader in 32 bit mode is going to be harder (because we need to be able to read two symbols at a time, each with max length 15 bits) Change-Id: I89746fb103b87b5e2fd40a3208a6fbc584b88297
This commit is contained in:
parent
b7490f8553
commit
1667bded67
@ -149,29 +149,21 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Decodes the next Huffman code from bit-stream.
|
||||
// FillBitWindow(br) needs to be called at minimum every second call
|
||||
// to ReadSymbolUnsafe.
|
||||
static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
|
||||
const HuffmanTreeNode* node = tree->root_;
|
||||
assert(node != NULL);
|
||||
while (!HuffmanTreeNodeIsLeaf(node)) {
|
||||
node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
|
||||
}
|
||||
return node->symbol_;
|
||||
}
|
||||
|
||||
// to ReadSymbol, in order to pre-fetch enough bits.
|
||||
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
|
||||
VP8LBitReader* const br) {
|
||||
const int read_safe = (br->pos_ + 8 > br->len_);
|
||||
if (!read_safe) {
|
||||
return ReadSymbolUnsafe(tree, br);
|
||||
} else {
|
||||
const HuffmanTreeNode* node = tree->root_;
|
||||
assert(node != NULL);
|
||||
while (!HuffmanTreeNodeIsLeaf(node)) {
|
||||
node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
|
||||
}
|
||||
return node->symbol_;
|
||||
const HuffmanTreeNode* node = tree->root_;
|
||||
int num_bits = 0;
|
||||
uint32_t bits;
|
||||
bits = VP8LPrefetchBits(br);
|
||||
assert(node != NULL);
|
||||
while (!HuffmanTreeNodeIsLeaf(node)) {
|
||||
node = HuffmanTreeNextNode(node, bits & 1);
|
||||
bits >>= 1;
|
||||
++num_bits;
|
||||
}
|
||||
VP8LDiscardBits(br, num_bits);
|
||||
return node->symbol_;
|
||||
}
|
||||
|
||||
static int ReadHuffmanCodeLengths(
|
||||
|
@ -113,6 +113,10 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
|
||||
|
||||
#define MAX_NUM_BIT_READ 25
|
||||
|
||||
#define LBITS 64 // Number of bits prefetched.
|
||||
#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
|
||||
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
|
||||
|
||||
static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
|
||||
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
|
||||
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
|
||||
@ -134,7 +138,7 @@ void VP8LInitBitReader(VP8LBitReader* const br,
|
||||
br->eos_ = 0;
|
||||
br->error_ = 0;
|
||||
for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
|
||||
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
|
||||
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i);
|
||||
++br->pos_;
|
||||
}
|
||||
}
|
||||
@ -149,91 +153,56 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
|
||||
br->len_ = len;
|
||||
}
|
||||
|
||||
// If not at EOS, reload up to LBITS byte-by-byte
|
||||
static void ShiftBytes(VP8LBitReader* const br) {
|
||||
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
|
||||
br->val_ >>= 8;
|
||||
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
|
||||
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8);
|
||||
++br->pos_;
|
||||
br->bit_pos_ -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LFillBitWindow(VP8LBitReader* const br) {
|
||||
if (br->bit_pos_ >= 32) {
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
if (br->pos_ + 8 < br->len_) {
|
||||
br->val_ >>= 32;
|
||||
if (br->bit_pos_ >= WBITS) {
|
||||
#if (defined(__x86_64__) || defined(_M_X64))
|
||||
if (br->pos_ + sizeof(br->val_) < br->len_) {
|
||||
br->val_ >>= WBITS;
|
||||
br->bit_pos_ -= WBITS;
|
||||
// The expression below needs a little-endian arch to work correctly.
|
||||
// This gives a large speedup for decoding speed.
|
||||
br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
|
||||
br->pos_ += 4;
|
||||
br->bit_pos_ -= 32;
|
||||
} else {
|
||||
// Slow path.
|
||||
ShiftBytes(br);
|
||||
br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
|
||||
br->pos_ += LOG8_WBITS;
|
||||
return;
|
||||
}
|
||||
#else
|
||||
// Always the slow path.
|
||||
ShiftBytes(br);
|
||||
#endif
|
||||
}
|
||||
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
|
||||
br->eos_ = 1;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
|
||||
const uint32_t val = (uint32_t)((br->val_ >> br->bit_pos_) & 1);
|
||||
// Flag an error at end_of_stream.
|
||||
if (!br->eos_) {
|
||||
++br->bit_pos_;
|
||||
if (br->bit_pos_ >= 32) {
|
||||
ShiftBytes(br);
|
||||
}
|
||||
// After this last bit is read, check if eos needs to be flagged.
|
||||
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
|
||||
ShiftBytes(br); // Slow path.
|
||||
if (br->pos_ == br->len_ && br->bit_pos_ == LBITS) {
|
||||
br->eos_ = 1;
|
||||
}
|
||||
} else {
|
||||
br->error_ = 1;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
|
||||
uint32_t val = 0;
|
||||
assert(n_bits >= 0);
|
||||
// Flag an error if end_of_stream or n_bits is more than allowed limit.
|
||||
if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
|
||||
const uint32_t val =
|
||||
(uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
|
||||
const int new_bits = br->bit_pos_ + n_bits;
|
||||
br->bit_pos_ = new_bits;
|
||||
// If this read is going to cross the read buffer, set the eos flag.
|
||||
if (br->pos_ == br->len_) {
|
||||
if ((br->bit_pos_ + n_bits) >= 64) {
|
||||
if (new_bits >= LBITS) {
|
||||
br->eos_ = 1;
|
||||
if ((br->bit_pos_ + n_bits) > 64) return val;
|
||||
}
|
||||
}
|
||||
val = (uint32_t)((br->val_ >> br->bit_pos_) & kBitMask[n_bits]);
|
||||
br->bit_pos_ += n_bits;
|
||||
if (br->bit_pos_ >= 40) {
|
||||
if (br->pos_ + 5 < br->len_) {
|
||||
br->val_ >>= 40;
|
||||
br->val_ |=
|
||||
(((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
|
||||
(((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
|
||||
(((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
|
||||
(((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
|
||||
(((uint64_t)br->buf_[br->pos_ + 4]) << 56);
|
||||
br->pos_ += 5;
|
||||
br->bit_pos_ -= 40;
|
||||
}
|
||||
if (br->bit_pos_ >= 8) {
|
||||
ShiftBytes(br);
|
||||
}
|
||||
}
|
||||
ShiftBytes(br);
|
||||
return val;
|
||||
} else {
|
||||
br->error_ = 1;
|
||||
return 0;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -258,14 +258,16 @@ static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
|
||||
// -----------------------------------------------------------------------------
|
||||
// Bitreader for lossless format
|
||||
|
||||
typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit.
|
||||
|
||||
typedef struct {
|
||||
uint64_t val_;
|
||||
const uint8_t* buf_;
|
||||
size_t len_;
|
||||
size_t pos_;
|
||||
int bit_pos_;
|
||||
int eos_;
|
||||
int error_;
|
||||
vp8l_val_t val_; // pre-fetched bits
|
||||
const uint8_t* buf_; // input byte buffer
|
||||
size_t len_; // buffer length
|
||||
size_t pos_; // byte position in buf_
|
||||
int bit_pos_; // current bit-reading position in val_
|
||||
int eos_; // bitstream is finished
|
||||
int error_; // an error occurred (buffer overflow attempt...)
|
||||
} VP8LBitReader;
|
||||
|
||||
void VP8LInitBitReader(VP8LBitReader* const br,
|
||||
@ -281,17 +283,14 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
|
||||
// Flags eos if this read attempt is going to cross the read buffer.
|
||||
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
|
||||
|
||||
// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
|
||||
// Flags eos after reading last bit from the buffer.
|
||||
uint32_t VP8LReadOneBit(VP8LBitReader* const br);
|
||||
// Return the prefetched bits, so they can be looked up.
|
||||
static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
|
||||
return (uint32_t)(br->val_ >> br->bit_pos_);
|
||||
}
|
||||
|
||||
// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
|
||||
// 32 times after the last VP8LFillBitWindow. Any subsequent calls
|
||||
// (without VP8LFillBitWindow) will return invalid data.
|
||||
static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
|
||||
const uint32_t val = (uint32_t)((br->val_ >> br->bit_pos_) & 1);
|
||||
++br->bit_pos_;
|
||||
return val;
|
||||
// Discard 'num_bits' bits from the cache.
|
||||
static WEBP_INLINE void VP8LDiscardBits(VP8LBitReader* const br, int num_bits) {
|
||||
br->bit_pos_ += num_bits;
|
||||
}
|
||||
|
||||
// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
|
||||
|
Loading…
Reference in New Issue
Block a user