Merge "remove the PACK() bit-packing tricks"
This commit is contained in:
commit
7e2d65950f
@ -595,6 +595,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
// predict and add residuals
|
// predict and add residuals
|
||||||
if (block->is_i4x4_) { // 4x4
|
if (block->is_i4x4_) { // 4x4
|
||||||
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
|
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
|
||||||
|
uint32_t bits = (block->non_zero_ & 0xffff) | (block->non_zero_ac_ << 16);
|
||||||
|
|
||||||
if (dec->mb_y_ > 0) {
|
if (dec->mb_y_ > 0) {
|
||||||
if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border
|
if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border
|
||||||
@ -607,25 +608,26 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
|
top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
|
||||||
|
|
||||||
// predict and add residuals for all 4x4 blocks in turn.
|
// predict and add residuals for all 4x4 blocks in turn.
|
||||||
for (n = 0; n < 16; n++) {
|
for (n = 0; n < 16; ++n, bits <<= 1) {
|
||||||
uint8_t* const dst = y_dst + kScan[n];
|
uint8_t* const dst = y_dst + kScan[n];
|
||||||
VP8PredLuma4[block->imodes_[n]](dst);
|
VP8PredLuma4[block->imodes_[n]](dst);
|
||||||
if (block->non_zero_ac_ & (1 << n)) {
|
if (bits & (1UL << 31)) {
|
||||||
VP8Transform(coeffs + n * 16, dst, 0);
|
VP8Transform(coeffs + n * 16, dst, 0);
|
||||||
} else if (block->non_zero_ & (1 << n)) { // only DC is present
|
} else if (bits & (1UL << 15)) { // only DC is present
|
||||||
VP8TransformDC(coeffs + n * 16, dst);
|
VP8TransformDC(coeffs + n * 16, dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // 16x16
|
} else { // 16x16
|
||||||
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_,
|
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_,
|
||||||
block->imodes_[0]);
|
block->imodes_[0]);
|
||||||
|
uint32_t bits = (block->non_zero_ & 0xffff) | (block->non_zero_ac_ << 16);
|
||||||
VP8PredLuma16[pred_func](y_dst);
|
VP8PredLuma16[pred_func](y_dst);
|
||||||
if (block->non_zero_ & 0xffff) {
|
if (bits & 0xffff) {
|
||||||
for (n = 0; n < 16; n++) {
|
for (n = 0; n < 16; ++n, bits <<= 1) {
|
||||||
uint8_t* const dst = y_dst + kScan[n];
|
uint8_t* const dst = y_dst + kScan[n];
|
||||||
if (block->non_zero_ac_ & (1 << n)) {
|
if (bits & (1UL << 31)) {
|
||||||
VP8Transform(coeffs + n * 16, dst, 0);
|
VP8Transform(coeffs + n * 16, dst, 0);
|
||||||
} else if (block->non_zero_ & (1 << n)) { // only DC is present
|
} else if (bits & (1UL << 15)) { // only DC is present
|
||||||
VP8TransformDC(coeffs + n * 16, dst);
|
VP8TransformDC(coeffs + n * 16, dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -505,28 +505,6 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
|
|||||||
return 16;
|
return 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Alias-safe way of converting 4bytes to 32bits.
|
|
||||||
typedef union {
|
|
||||||
uint8_t i8[4];
|
|
||||||
uint32_t i32;
|
|
||||||
} PackedNz;
|
|
||||||
|
|
||||||
// Table to unpack four bits into four bytes
|
|
||||||
static const PackedNz kUnpackTab[16] = {
|
|
||||||
{{0, 0, 0, 0}}, {{1, 0, 0, 0}}, {{0, 1, 0, 0}}, {{1, 1, 0, 0}},
|
|
||||||
{{0, 0, 1, 0}}, {{1, 0, 1, 0}}, {{0, 1, 1, 0}}, {{1, 1, 1, 0}},
|
|
||||||
{{0, 0, 0, 1}}, {{1, 0, 0, 1}}, {{0, 1, 0, 1}}, {{1, 1, 0, 1}},
|
|
||||||
{{0, 0, 1, 1}}, {{1, 0, 1, 1}}, {{0, 1, 1, 1}}, {{1, 1, 1, 1}} };
|
|
||||||
|
|
||||||
// Macro to pack four LSB of four bytes into four bits.
|
|
||||||
#if defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) || \
|
|
||||||
defined(__BIG_ENDIAN__)
|
|
||||||
#define PACK_CST 0x08040201U
|
|
||||||
#else
|
|
||||||
#define PACK_CST 0x01020408U
|
|
||||||
#endif
|
|
||||||
#define PACK(X, S) ((((X).i32 * PACK_CST) & 0xff000000) >> (S))
|
|
||||||
|
|
||||||
static int ParseResiduals(VP8Decoder* const dec,
|
static int ParseResiduals(VP8Decoder* const dec,
|
||||||
VP8MB* const mb, VP8BitReader* const token_br) {
|
VP8MB* const mb, VP8BitReader* const token_br) {
|
||||||
uint32_t out_t_nz, out_l_nz;
|
uint32_t out_t_nz, out_l_nz;
|
||||||
@ -536,8 +514,7 @@ static int ParseResiduals(VP8Decoder* const dec,
|
|||||||
VP8MBData* const block = dec->mb_data_;
|
VP8MBData* const block = dec->mb_data_;
|
||||||
int16_t* dst = block->coeffs_;
|
int16_t* dst = block->coeffs_;
|
||||||
VP8MB* const left_mb = dec->mb_info_ - 1;
|
VP8MB* const left_mb = dec->mb_info_ - 1;
|
||||||
PackedNz nz_ac, nz_dc;
|
uint8_t tnz, lnz;
|
||||||
PackedNz tnz, lnz;
|
|
||||||
uint32_t non_zero_ac = 0;
|
uint32_t non_zero_ac = 0;
|
||||||
uint32_t non_zero_dc = 0;
|
uint32_t non_zero_dc = 0;
|
||||||
int x, y, ch;
|
int x, y, ch;
|
||||||
@ -557,48 +534,55 @@ static int ParseResiduals(VP8Decoder* const dec,
|
|||||||
ac_prob = (ProbaArray)dec->proba_.coeffs_[3];
|
ac_prob = (ProbaArray)dec->proba_.coeffs_[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
tnz = kUnpackTab[mb->nz_ & 0xf];
|
tnz = mb->nz_ & 0x0f;
|
||||||
lnz = kUnpackTab[left_mb->nz_ & 0xf];
|
lnz = left_mb->nz_ & 0x0f;
|
||||||
for (y = 0; y < 4; ++y) {
|
for (y = 0; y < 4; ++y) {
|
||||||
int l = lnz.i8[y];
|
int l = lnz & 1;
|
||||||
|
uint32_t nz_dc = 0, nz_ac = 0;
|
||||||
for (x = 0; x < 4; ++x) {
|
for (x = 0; x < 4; ++x) {
|
||||||
const int ctx = l + tnz.i8[x];
|
const int ctx = l + (tnz & 1);
|
||||||
const int nz = GetCoeffs(token_br, ac_prob, ctx,
|
const int nz = GetCoeffs(token_br, ac_prob, ctx,
|
||||||
q->y1_mat_, first, dst);
|
q->y1_mat_, first, dst);
|
||||||
tnz.i8[x] = l = (nz > 0);
|
l = (nz > 0);
|
||||||
nz_dc.i8[x] = (dst[0] != 0);
|
tnz = (tnz >> 1) | (l << 7);
|
||||||
nz_ac.i8[x] = (nz > 1);
|
nz_dc = (nz_dc << 1) | (dst[0] != 0);
|
||||||
|
nz_ac = (nz_ac << 1) | (nz > 1);
|
||||||
dst += 16;
|
dst += 16;
|
||||||
}
|
}
|
||||||
lnz.i8[y] = l;
|
tnz >>= 4;
|
||||||
non_zero_dc |= PACK(nz_dc, 24 - y * 4);
|
lnz = (lnz >> 1) | (l << 7);
|
||||||
non_zero_ac |= PACK(nz_ac, 24 - y * 4);
|
non_zero_dc = (non_zero_dc << 4) | nz_dc;
|
||||||
|
non_zero_ac = (non_zero_ac << 4) | nz_ac;
|
||||||
}
|
}
|
||||||
out_t_nz = PACK(tnz, 24);
|
out_t_nz = tnz;
|
||||||
out_l_nz = PACK(lnz, 24);
|
out_l_nz = lnz >> 4;
|
||||||
|
|
||||||
tnz = kUnpackTab[mb->nz_ >> 4];
|
|
||||||
lnz = kUnpackTab[left_mb->nz_ >> 4];
|
|
||||||
for (ch = 0; ch < 4; ch += 2) {
|
for (ch = 0; ch < 4; ch += 2) {
|
||||||
|
uint32_t nz_dc = 0, nz_ac = 0;
|
||||||
|
tnz = mb->nz_ >> (4 + ch);
|
||||||
|
lnz = left_mb->nz_ >> (4 + ch);
|
||||||
for (y = 0; y < 2; ++y) {
|
for (y = 0; y < 2; ++y) {
|
||||||
int l = lnz.i8[ch + y];
|
int l = lnz & 1;
|
||||||
for (x = 0; x < 2; ++x) {
|
for (x = 0; x < 2; ++x) {
|
||||||
const int ctx = l + tnz.i8[ch + x];
|
const int ctx = l + (tnz & 1);
|
||||||
const int nz =
|
const int nz =
|
||||||
GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[2],
|
GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[2],
|
||||||
ctx, q->uv_mat_, 0, dst);
|
ctx, q->uv_mat_, 0, dst);
|
||||||
tnz.i8[ch + x] = l = (nz > 0);
|
l = (nz > 0);
|
||||||
nz_dc.i8[y * 2 + x] = (dst[0] != 0);
|
tnz = (tnz >> 1) | (l << 3);
|
||||||
nz_ac.i8[y * 2 + x] = (nz > 1);
|
nz_dc = (nz_dc << 1) | (dst[0] != 0);
|
||||||
|
nz_ac = (nz_ac << 1) | (nz > 1);
|
||||||
dst += 16;
|
dst += 16;
|
||||||
}
|
}
|
||||||
lnz.i8[ch + y] = l;
|
tnz >>= 2;
|
||||||
|
lnz = (lnz >> 1) | (l << 5);
|
||||||
}
|
}
|
||||||
non_zero_dc |= PACK(nz_dc, 8 - ch * 2);
|
// Note: we don't really need the per-4x4 details for U/V blocks.
|
||||||
non_zero_ac |= PACK(nz_ac, 8 - ch * 2);
|
non_zero_dc |= (nz_dc & 0x0f) << (16 + 2 * ch);
|
||||||
|
non_zero_ac |= (nz_ac & 0x0f) << (16 + 2 * ch);
|
||||||
|
out_t_nz |= (tnz << 4) << ch;
|
||||||
|
out_l_nz |= (lnz & 0xf0) << ch;
|
||||||
}
|
}
|
||||||
out_t_nz |= PACK(tnz, 20);
|
|
||||||
out_l_nz |= PACK(lnz, 20);
|
|
||||||
mb->nz_ = out_t_nz;
|
mb->nz_ = out_t_nz;
|
||||||
left_mb->nz_ = out_l_nz;
|
left_mb->nz_ = out_l_nz;
|
||||||
|
|
||||||
@ -606,7 +590,6 @@ static int ParseResiduals(VP8Decoder* const dec,
|
|||||||
block->non_zero_ = non_zero_ac | non_zero_dc;
|
block->non_zero_ = non_zero_ac | non_zero_dc;
|
||||||
return !block->non_zero_; // will be used for further optimization
|
return !block->non_zero_; // will be used for further optimization
|
||||||
}
|
}
|
||||||
#undef PACK
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Main loop
|
// Main loop
|
||||||
|
@ -177,8 +177,8 @@ typedef struct {
|
|||||||
uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
|
uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
|
||||||
uint8_t uvmode_; // chroma prediction mode
|
uint8_t uvmode_; // chroma prediction mode
|
||||||
// bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
|
// bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
|
||||||
// for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
|
// for luma (bits #15->#0), then 4 bits for chroma-u (#19->#16) and 4 bits for
|
||||||
// chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
|
// chroma-v (#23->#20), each corresponding to one 4x4 block in decoding order.
|
||||||
// If the bit is set, the 4x4 block contains some non-zero coefficients.
|
// If the bit is set, the 4x4 block contains some non-zero coefficients.
|
||||||
uint32_t non_zero_;
|
uint32_t non_zero_;
|
||||||
uint32_t non_zero_ac_;
|
uint32_t non_zero_ac_;
|
||||||
|
Loading…
Reference in New Issue
Block a user