VP8LBitWriter: use a bit-accumulator
* simplify the endian logic * remove the need for memset() * write 16 or 32 at a time (likely aligned) Makes the code a bit faster on ARM (~1%) Change-Id: I650bc5654e8d0b0454318b7a78206b301c5f6c2c
This commit is contained in:
parent
3f40b4a581
commit
bf182e837e
@ -194,19 +194,54 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
|
||||
//------------------------------------------------------------------------------
|
||||
// VP8LBitWriter
|
||||
|
||||
// This is the minimum amount of size the memory buffer is guaranteed to grow
|
||||
// when extra space is needed.
|
||||
#define MIN_EXTRA_SIZE (32768ULL)
|
||||
|
||||
#define VP8L_WRITER_BYTES ((int)sizeof(vp8l_wtype_t))
|
||||
#define VP8L_WRITER_BITS (VP8L_WRITER_BYTES * 8)
|
||||
|
||||
// endian-specific htoleXX() definition
|
||||
// TODO(skal): move this to config.h, and collect all the endian-related code
|
||||
// in a proper .h file
|
||||
#if defined(_WIN32)
|
||||
#if !defined(_M_PPC)
|
||||
#define htole32(x) (x)
|
||||
#define htole16(x) (x)
|
||||
#else // PPC is BIG_ENDIAN
|
||||
#include <stdlib.h>
|
||||
#define htole32(x) (_byteswap_ulong((unsigned long)(x)))
|
||||
#define htole16(x) (_byteswap_ushort((unsigned short)(x)))
|
||||
#endif // _M_PPC
|
||||
#elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || \
|
||||
defined(__DragonFly__)
|
||||
#include <sys/endian.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <libkern/OSByteOrder.h>
|
||||
#define htole32 OSSwapHostToLittleInt32
|
||||
#define htole16 OSSwapHostToLittleInt16
|
||||
#elif defined(__native_client__) && !defined(__GLIBC__)
|
||||
// NaCl without glibc is assumed to be little-endian
|
||||
#define htole32(x) (x)
|
||||
#define htole16(x) (x)
|
||||
#else // pretty much all linux and/or glibc
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
// Returns 1 on success.
|
||||
static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
|
||||
uint8_t* allocated_buf;
|
||||
size_t allocated_size;
|
||||
const size_t current_size = VP8LBitWriterNumBytes(bw);
|
||||
const size_t max_bytes = bw->end_ - bw->buf_;
|
||||
const size_t current_size = bw->cur_ - bw->buf_;
|
||||
const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
|
||||
const size_t size_required = (size_t)size_required_64b;
|
||||
if (size_required != size_required_64b) {
|
||||
bw->error_ = 1;
|
||||
return 0;
|
||||
}
|
||||
if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1;
|
||||
allocated_size = (3 * bw->max_bytes_) >> 1;
|
||||
if (max_bytes > 0 && size_required <= max_bytes) return 1;
|
||||
allocated_size = (3 * max_bytes) >> 1;
|
||||
if (allocated_size < size_required) allocated_size = size_required;
|
||||
// make allocated size multiple of 1k
|
||||
allocated_size = (((allocated_size >> 10) + 1) << 10);
|
||||
@ -215,11 +250,13 @@ static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
|
||||
bw->error_ = 1;
|
||||
return 0;
|
||||
}
|
||||
memcpy(allocated_buf, bw->buf_, current_size);
|
||||
if (current_size > 0) {
|
||||
memcpy(allocated_buf, bw->buf_, current_size);
|
||||
}
|
||||
free(bw->buf_);
|
||||
bw->buf_ = allocated_buf;
|
||||
bw->max_bytes_ = allocated_size;
|
||||
memset(allocated_buf + current_size, 0, allocated_size - current_size);
|
||||
bw->cur_ = bw->buf_ + current_size;
|
||||
bw->end_ = bw->buf_ + allocated_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -236,46 +273,37 @@ void VP8LBitWriterDestroy(VP8LBitWriter* const bw) {
|
||||
}
|
||||
|
||||
void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
|
||||
if (n_bits < 1) return;
|
||||
#if !defined(__BIG_ENDIAN__)
|
||||
// Technically, this branch of the code can write up to 25 bits at a time,
|
||||
// but in prefix encoding, the maximum number of bits written is 18 at a time.
|
||||
{
|
||||
uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3];
|
||||
uint32_t v = *(const uint32_t*)p;
|
||||
v |= bits << (bw->bit_pos_ & 7);
|
||||
*(uint32_t*)p = v;
|
||||
bw->bit_pos_ += n_bits;
|
||||
}
|
||||
#else // BIG_ENDIAN
|
||||
{
|
||||
uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
|
||||
const int bits_reserved_in_first_byte = bw->bit_pos_ & 7;
|
||||
const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
|
||||
// implicit & 0xff is assumed for uint8_t arithmetic
|
||||
*p++ |= bits << bits_reserved_in_first_byte;
|
||||
bits >>= 8 - bits_reserved_in_first_byte;
|
||||
if (bits_left_to_write >= 1) {
|
||||
*p++ = bits;
|
||||
bits >>= 8;
|
||||
if (bits_left_to_write >= 9) {
|
||||
*p++ = bits;
|
||||
bits >>= 8;
|
||||
if (n_bits <= 0) return;
|
||||
bw->bits_ |= (vp8l_atype_t)bits << bw->used_;
|
||||
bw->used_ += n_bits;
|
||||
if (bw->used_ > VP8L_WRITER_BITS) {
|
||||
if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
|
||||
const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
|
||||
if (extra_size != (size_t)extra_size ||
|
||||
!VP8LBitWriterResize(bw, (size_t)extra_size)) {
|
||||
bw->cur_ = bw->buf_;
|
||||
bw->error_ = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
assert(n_bits <= 25);
|
||||
*p = bits;
|
||||
bw->bit_pos_ += n_bits;
|
||||
*(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)bw->bits_);
|
||||
bw->cur_ += VP8L_WRITER_BYTES;
|
||||
bw->bits_ >>= VP8L_WRITER_BITS;
|
||||
bw->used_ -= VP8L_WRITER_BITS;
|
||||
}
|
||||
#endif
|
||||
if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) {
|
||||
const uint64_t extra_size = 32768ULL + bw->max_bytes_;
|
||||
if (extra_size != (size_t)extra_size ||
|
||||
!VP8LBitWriterResize(bw, (size_t)extra_size)) {
|
||||
bw->bit_pos_ = 0;
|
||||
bw->error_ = 1;
|
||||
}
|
||||
|
||||
uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
|
||||
// flush leftover bits
|
||||
if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) {
|
||||
while (bw->used_ > 0) {
|
||||
*bw->cur_++ = bw->bits_; // & 0xff is implied here
|
||||
bw->bits_ >>= 8;
|
||||
bw->used_ -= 8;
|
||||
}
|
||||
bw->used_ = 0;
|
||||
}
|
||||
return bw->buf_;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -68,51 +68,45 @@ static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VP8LBitWriter
|
||||
// TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope
|
||||
// of re-using VP8BitWriter. Will evaluate once basic lossless encoder is
|
||||
// implemented.
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) // 64bit
|
||||
typedef uint64_t vp8l_atype_t; // accumulator type
|
||||
typedef uint32_t vp8l_wtype_t; // writing type
|
||||
#define WSWAP htole32
|
||||
#else
|
||||
typedef uint32_t vp8l_atype_t;
|
||||
typedef uint16_t vp8l_wtype_t;
|
||||
#define WSWAP htole16
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint8_t* buf_;
|
||||
size_t bit_pos_;
|
||||
size_t max_bytes_;
|
||||
vp8l_atype_t bits_; // bit accumulator
|
||||
int used_; // number of bits used in accumulator
|
||||
uint8_t* buf_; // start of buffer
|
||||
uint8_t* cur_; // current write position
|
||||
uint8_t* end_; // end of buffer
|
||||
|
||||
// After all bits are written, the caller must observe the state of
|
||||
// error_. A value of 1 indicates that a memory allocation failure
|
||||
// has happened during bit writing. A value of 0 indicates successful
|
||||
// After all bits are written (VP8LBitWriterFinish()), the caller must observe
|
||||
// the state of error_. A value of 1 indicates that a memory allocation
|
||||
// failure has happened during bit writing. A value of 0 indicates successful
|
||||
// writing of bits.
|
||||
int error_;
|
||||
} VP8LBitWriter;
|
||||
|
||||
static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
|
||||
return (bw->bit_pos_ + 7) >> 3;
|
||||
return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
|
||||
return bw->buf_;
|
||||
}
|
||||
uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
|
||||
|
||||
// Returns 0 in case of memory allocation error.
|
||||
int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
|
||||
|
||||
void VP8LBitWriterDestroy(VP8LBitWriter* const bw);
|
||||
|
||||
// This function writes bits into bytes in increasing addresses, and within
|
||||
// a byte least-significant-bit first.
|
||||
//
|
||||
// The function can write up to 16 bits in one go with WriteBits
|
||||
// Example: let's assume that 3 bits (Rs below) have been written already:
|
||||
//
|
||||
// BYTE-0 BYTE+1 BYTE+2
|
||||
//
|
||||
// 0000 0RRR 0000 0000 0000 0000
|
||||
//
|
||||
// Now, we could write 5 or less bits in MSB by just sifting by 3
|
||||
// and OR'ing to BYTE-0.
|
||||
//
|
||||
// For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0,
|
||||
// and locate the rest in BYTE+1 and BYTE+2.
|
||||
//
|
||||
// This function writes bits into bytes in increasing addresses (little endian),
|
||||
// and within a byte least-significant-bit first.
|
||||
// The function can write up to 8*sizeof(vp8l_wtype_t) bits in one go.
|
||||
// VP8LBitWriter's error_ flag is set in case of memory allocation error.
|
||||
void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user