vp9: coef context reading optimization.
This commit is contained in:

committed by
Clément Bœsch

parent
9aeca1c572
commit
cdfcd06126
@@ -2057,14 +2057,20 @@ static void decode_coeffs(AVCodecContext *ctx)
|
|||||||
const int16_t *y_band_counts = band_counts[b->tx];
|
const int16_t *y_band_counts = band_counts[b->tx];
|
||||||
const int16_t *uv_band_counts = band_counts[b->uvtx];
|
const int16_t *uv_band_counts = band_counts[b->uvtx];
|
||||||
|
|
||||||
|
#define MERGE(la, end, step, rd) \
|
||||||
|
for (n = 0; n < end; n += step) \
|
||||||
|
la[n] = !!rd(&la[n])
|
||||||
|
#define MERGE_CTX(step, rd) \
|
||||||
|
do { \
|
||||||
|
MERGE(l, end_y, step, rd); \
|
||||||
|
MERGE(a, end_x, step, rd); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
/* y tokens */
|
/* y tokens */
|
||||||
if (b->tx > TX_4X4) { // FIXME slow
|
switch (b->tx) {
|
||||||
for (y = 0; y < end_y; y += step1d)
|
case TX_8X8: MERGE_CTX(2, AV_RN16A); break;
|
||||||
for (x = 1; x < step1d; x++)
|
case TX_16X16: MERGE_CTX(4, AV_RN32A); break;
|
||||||
l[y] |= l[y + x];
|
case TX_32X32: MERGE_CTX(8, AV_RN64A); break;
|
||||||
for (x = 0; x < end_x; x += step1d)
|
|
||||||
for (y = 1; y < step1d; y++)
|
|
||||||
a[x] |= a[x + y];
|
|
||||||
}
|
}
|
||||||
for (n = 0, y = 0; y < end_y; y += step1d) {
|
for (n = 0, y = 0; y < end_y; y += step1d) {
|
||||||
for (x = 0; x < end_x; x += step1d, n += step) {
|
for (x = 0; x < end_x; x += step1d, n += step) {
|
||||||
@@ -2100,13 +2106,10 @@ static void decode_coeffs(AVCodecContext *ctx)
|
|||||||
for (pl = 0; pl < 2; pl++) {
|
for (pl = 0; pl < 2; pl++) {
|
||||||
a = &s->above_uv_nnz_ctx[pl][col];
|
a = &s->above_uv_nnz_ctx[pl][col];
|
||||||
l = &s->left_uv_nnz_ctx[pl][row & 7];
|
l = &s->left_uv_nnz_ctx[pl][row & 7];
|
||||||
if (b->uvtx > TX_4X4) { // FIXME slow
|
switch (b->uvtx) {
|
||||||
for (y = 0; y < end_y; y += uvstep1d)
|
case TX_8X8: MERGE_CTX(2, AV_RN16A); break;
|
||||||
for (x = 1; x < uvstep1d; x++)
|
case TX_16X16: MERGE_CTX(4, AV_RN32A); break;
|
||||||
l[y] |= l[y + x];
|
case TX_32X32: MERGE_CTX(8, AV_RN64A); break;
|
||||||
for (x = 0; x < end_x; x += uvstep1d)
|
|
||||||
for (y = 1; y < uvstep1d; y++)
|
|
||||||
a[x] |= a[x + y];
|
|
||||||
}
|
}
|
||||||
for (n = 0, y = 0; y < end_y; y += uvstep1d) {
|
for (n = 0, y = 0; y < end_y; y += uvstep1d) {
|
||||||
for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
|
for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
|
||||||
|
Reference in New Issue
Block a user