diff --git a/vp9/common/generic/systemdependent.c b/vp9/common/generic/systemdependent.c index a3d6cb478..caf625795 100644 --- a/vp9/common/generic/systemdependent.c +++ b/vp9/common/generic/systemdependent.c @@ -29,10 +29,11 @@ void vp9_machine_specific_config(VP9_COMMON *ctx) { rtcd->idct.iwalsh1 = vp9_short_inv_walsh4x4_1_c; rtcd->idct.iwalsh16 = vp9_short_inv_walsh4x4_c; rtcd->idct.idct8 = vp9_short_idct8x8_c; + rtcd->idct.idct10_8 = vp9_short_idct10_8x8_c; rtcd->idct.idct1_scalar_add_8x8 = vp9_dc_only_idct_add_8x8_c; rtcd->idct.ihaar2 = vp9_short_ihaar2x2_c; rtcd->idct.idct16x16 = vp9_short_idct16x16_c; - rtcd->idct.idct10_16x16 = vp9_short_idct10_16x16_c; + rtcd->idct.idct10_16x16 = vp9_short_idct10_16x16_c; rtcd->subpix.eighttap16x16 = vp9_eighttap_predict16x16_c; rtcd->subpix.eighttap8x8 = vp9_eighttap_predict8x8_c; diff --git a/vp9/common/idct.h b/vp9/common/idct.h index b8d3121b6..0f0478cd5 100644 --- a/vp9/common/idct.h +++ b/vp9/common/idct.h @@ -60,6 +60,11 @@ extern prototype_idct(vp9_idct_idct10_16x16); #endif extern prototype_idct(vp9_idct_idct8); +#ifndef vp9_idct_idct10_8 +#define vp9_idct_idct10_8 vp9_short_idct10_8x8_c +#endif +extern prototype_idct(vp9_idct_idct10_8); + #ifndef vp9_idct_idct8_1 #define vp9_idct_idct8_1 vp9_short_idct8x8_1_c #endif @@ -132,6 +137,7 @@ typedef struct { vp9_second_order_fn_t iwalsh16; vp9_idct_fn_t idct8; + vp9_idct_fn_t idct10_8; vp9_idct_fn_t idct8_1; vp9_idct_scalar_add_fn_t idct1_scalar_add_8x8; vp9_idct_fn_t ihaar2; diff --git a/vp9/common/idctllm.c b/vp9/common/idctllm.c index 3efc094e3..aa5665473 100644 --- a/vp9/common/idctllm.c +++ b/vp9/common/idctllm.c @@ -967,6 +967,127 @@ void vp9_short_idct8x8_c(short *coefs, short *block, int pitch) { } } +/* Row IDCT when only first 4 coefficients are non-zero. */ +static void idctrow10(int *blk) { + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) | + (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) { + blk[0] = blk[1] = blk[2] = blk[3] = blk[4] + = blk[5] = blk[6] = blk[7] = blk[0] << 3; + return; + } + + x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */ + /* first stage */ + x5 = W7 * x4; + x4 = W1 * x4; + x6 = W3 * x7; + x7 = -W5 * x7; + + /* second stage */ + x2 = W6 * x3; + x3 = W2 * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x0 + x3; + x8 = x0 - x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[0] = (x7 + x1) >> 8; + blk[1] = (x3 + x2) >> 8; + blk[2] = (x0 + x4) >> 8; + blk[3] = (x8 + x6) >> 8; + blk[4] = (x8 - x6) >> 8; + blk[5] = (x0 - x4) >> 8; + blk[6] = (x3 - x2) >> 8; + blk[7] = (x7 - x1) >> 8; +} + +/* Column (vertical) IDCT when only first 4 coefficients are non-zero. */ +static void idctcol10(int *blk) { + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) | + (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | + (x7 = blk[8 * 3]))) { + blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] + = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] + = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6); + return; + } + + x0 = (blk[8 * 0] << 8) + 16384; + + /* first stage */ + x5 = (W7 * x4 + 4) >> 3; + x4 = (W1 * x4 + 4) >> 3; + x6 = (W3 * x7 + 4) >> 3; + x7 = (-W5 * x7 + 4) >> 3; + + /* second stage */ + x2 = (W6 * x3 + 4) >> 3; + x3 = (W2 * x3 + 4) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x0 + x3; + x8 = x0 - x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[8 * 0] = (x7 + x1) >> 14; + blk[8 * 1] = (x3 + x2) >> 14; + blk[8 * 2] = (x0 + x4) >> 14; + blk[8 * 3] = (x8 + x6) >> 14; + blk[8 * 4] = (x8 - x6) >> 14; + blk[8 * 5] = (x0 - x4) >> 14; + blk[8 * 6] = (x3 - x2) >> 14; + blk[8 * 7] = (x7 - x1) >> 14; +} + +void vp9_short_idct10_8x8_c(short *coefs, short *block, int pitch) { + int X[TX_DIM * TX_DIM]; + int i, j; + int shortpitch = pitch >> 1; + + for (i = 0; i < TX_DIM; i++) { + for (j = 0; j < TX_DIM; j++) { + X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1 + + (coefs[i * TX_DIM + j] < 0)) >> 2; + } + } + + /* Do first 4 row idct only since non-zero dct coefficients are all in + * upper-left 4x4 area. */ + for (i = 0; i < 4; i++) + idctrow10(X + 8 * i); + + for (i = 0; i < 8; i++) + idctcol10(X + i); + + for (i = 0; i < TX_DIM; i++) { + for (j = 0; j < TX_DIM; j++) { + block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1; + } + } +} void vp9_short_ihaar2x2_c(short *input, short *output, int pitch) { int i; diff --git a/vp9/common/rtcd_defs.sh b/vp9/common/rtcd_defs.sh index 8b3ee0c71..4fef04d1e 100644 --- a/vp9/common/rtcd_defs.sh +++ b/vp9/common/rtcd_defs.sh @@ -57,12 +57,9 @@ specialize vp9_dequant_idct_add_uv_block_8x8 prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, unsigned short eobs" specialize vp9_dequant_idct_add_16x16 -prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride" +prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int dc, unsigned short eobs" specialize vp9_dequant_idct_add_8x8 -prototype void vp9_dequant_dc_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc" -specialize vp9_dequant_dc_idct_add_8x8 - prototype void vp9_dequant_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride" specialize vp9_dequant_idct_add diff --git a/vp9/decoder/decodframe.c b/vp9/decoder/decodframe.c index ebbf1669a..959e75f96 100644 --- a/vp9/decoder/decodframe.c +++ b/vp9/decoder/decodframe.c @@ -442,7 +442,8 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride); } else { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, + xd->eobs[idx]); } q += 64; } else { diff --git a/vp9/decoder/dequantize.c b/vp9/decoder/dequantize.c index e56426271..a23f8e9b6 100644 --- a/vp9/decoder/dequantize.c +++ b/vp9/decoder/dequantize.c @@ -19,8 +19,8 @@ extern int dec_debug; #endif -static void recon(int16_t *diff, uint8_t *pred, int pitch, uint8_t *dest, - int stride, int width, int height) { +static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, + uint8_t *dest, int stride, int width, int height) { int r, c; for (r = 0; r < height; r++) { @@ -41,12 +41,34 @@ static void recon(int16_t *diff, uint8_t *pred, int pitch, uint8_t *dest, } } +static void add_constant_residual(const int16_t diff, const uint8_t *pred, + int pitch, uint8_t *dest, int stride, + int width, int height) { + int r, c; + + for (r = 0; r < height; r++) { + for (c = 0; c < width; c++) { + int a = diff + pred[c]; + + if (a < 0) + a = 0; + else if (a > 255) + a = 255; + + dest[c] = (uint8_t) a; + } + + dest += stride; + pred += pitch; + } +} + void vp9_dequantize_b_c(BLOCKD *d) { int i; - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; + int16_t *DQ = d->dqcoeff; + int16_t *Q = d->qcoeff; + int16_t *DQC = d->dequant; for (i = 0; i < 16; i++) { DQ[i] = Q[i] * DQC[i]; @@ -54,11 +76,11 @@ void vp9_dequantize_b_c(BLOCKD *d) { } -void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, - unsigned char *pred, unsigned char *dest, +void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, int16_t *dq, + uint8_t *pred, uint8_t *dest, int pitch, int stride) { - short output[16]; - short *diff_ptr = output; + int16_t output[16]; + int16_t *diff_ptr = output; int i; for (i = 0; i < 16; i++) { @@ -69,18 +91,15 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, vpx_memset(input, 0, 32); - recon(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } -void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, - unsigned char *pred, unsigned char *dest, +void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, int16_t *dq, + uint8_t *pred, uint8_t *dest, int pitch, int stride) { - short output[64]; - short *diff_ptr = output; - int b, r, c; + int16_t output[64]; + int16_t *diff_ptr = output; int i; - unsigned char *origdest = dest; - unsigned char *origpred = pred; input[0] = dq[0] * input[0]; for (i = 1; i < 64; i++) { @@ -91,35 +110,13 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, vpx_memset(input, 0, 128); - for (b = 0; b < 4; b++) { - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - int a = diff_ptr[c] + pred[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dest[c] = (unsigned char) a; - } - - dest += stride; - diff_ptr += 8; - pred += pitch; - } - // shift buffer pointers to next 4x4 block in the submacroblock - diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4; - dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4; - pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4; - } + add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); } -void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride) { - short output[16]; - short *diff_ptr = output; +void vp9_dequant_idct_add_c(int16_t *input, int16_t *dq, uint8_t *pred, + uint8_t *dest, int pitch, int stride) { + int16_t output[16]; + int16_t *diff_ptr = output; int i; for (i = 0; i < 16; i++) { @@ -131,17 +128,17 @@ void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, vpx_memset(input, 0, 32); - recon(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } -void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride, +void vp9_dequant_dc_idct_add_c(int16_t *input, int16_t *dq, uint8_t *pred, + uint8_t *dest, int pitch, int stride, int Dc) { int i; - short output[16]; - short *diff_ptr = output; + int16_t output[16]; + int16_t *diff_ptr = output; - input[0] = (short)Dc; + input[0] = (int16_t)Dc; for (i = 1; i < 16; i++) { input[i] = dq[i] * input[i]; @@ -152,15 +149,15 @@ void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, vpx_memset(input, 0, 32); - recon(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } #if CONFIG_LOSSLESS -void vp9_dequant_idct_add_lossless_c(short *input, short *dq, - unsigned char *pred, unsigned char *dest, +void vp9_dequant_idct_add_lossless_c(int16_t *input, int16_t *dq, + uint8_t *pred, uint8_t *dest, int pitch, int stride) { - short output[16]; - short *diff_ptr = output; + int16_t output[16]; + int16_t *diff_ptr = output; int i; for (i = 0; i < 16; i++) { @@ -171,18 +168,18 @@ void vp9_dequant_idct_add_lossless_c(short *input, short *dq, vpx_memset(input, 0, 32); - recon(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } -void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq, - unsigned char *pred, - unsigned char *dest, +void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, int16_t *dq, + uint8_t *pred, + uint8_t *dest, int pitch, int stride, int dc) { int i; - short output[16]; - short *diff_ptr = output; + int16_t output[16]; + int16_t *diff_ptr = output; - input[0] = (short)dc; + input[0] = (int16_t)dc; for (i = 1; i < 16; i++) { input[i] = dq[i] * input[i]; @@ -191,18 +188,18 @@ void vp9_dequant_dc_idct_add_lossless_c(short *input, short *dq, vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1); vpx_memset(input, 0, 32); - recon(diff_ptr, pred, pitch, dest, stride, 4, 4); + add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4); } #endif void vp9_dequantize_b_2x2_c(BLOCKD *d) { int i; - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - short *DQC = d->dequant; + int16_t *DQ = d->dqcoeff; + int16_t *Q = d->qcoeff; + int16_t *DQC = d->dequant; for (i = 0; i < 16; i++) { - DQ[i] = (short)((Q[i] * DQC[i])); + DQ[i] = (int16_t)((Q[i] * DQC[i])); } #ifdef DEC_DEBUG if (dec_debug) { @@ -216,14 +213,12 @@ void vp9_dequantize_b_2x2_c(BLOCKD *d) { #endif } -void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride) { - short output[64]; - short *diff_ptr = output; - int r, c, b; +void vp9_dequant_idct_add_8x8_c(int16_t *input, int16_t *dq, uint8_t *pred, + uint8_t *dest, int pitch, int stride, + int dc, uint16_t eobs) { + int16_t output[64]; + int16_t *diff_ptr = output; int i; - unsigned char *origdest = dest; - unsigned char *origpred = pred; #ifdef DEC_DEBUG if (dec_debug) { @@ -236,12 +231,57 @@ void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, } #endif - input[0] = input[0] * dq[0]; + /* If dc is 1, then input[0] is the reconstructed value, do not need + * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + */ + if (!dc) + input[0] *= dq[0]; - // recover quantizer for 4 4x4 blocks - for (i = 1; i < 64; i++) { - input[i] = input[i] * dq[1]; - } + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to decide what to do. + * TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + * Combine that with code here. + */ + if (eobs == 0) { + /* All 0 DCT coefficient */ + vp9_copy_mem8x8(pred, pitch, dest, stride); + } else if (eobs == 1) { + /* DC only DCT coefficient. */ + int16_t out; + + /* Note: the idct1 will need to be modified accordingly whenever + * vp9_short_idct8x8_c() is modified. */ + out = (input[0] + 1 + (input[0] < 0)) >> 2; + out = out << 3; + out = (out + 32) >> 7; + + input[0] = 0; + + add_constant_residual(out, pred, pitch, dest, stride, 8, 8); + } else if (eobs <= 10) { + input[1] = input[1] * dq[1]; + input[2] = input[2] * dq[1]; + input[3] = input[3] * dq[1]; + input[8] = input[8] * dq[1]; + input[9] = input[9] * dq[1]; + input[10] = input[10] * dq[1]; + input[16] = input[16] * dq[1]; + input[17] = input[17] * dq[1]; + input[24] = input[24] * dq[1]; + + vp9_short_idct10_8x8_c(input, output, 16); + + input[0] = input[1] = input[2] = input[3] = 0; + input[8] = input[9] = input[10] = 0; + input[16] = input[17] = 0; + input[24] = 0; + + add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); + } else { + // recover quantizer for 4 4x4 blocks + for (i = 1; i < 64; i++) { + input[i] = input[i] * dq[1]; + } #ifdef DEC_DEBUG if (dec_debug) { int j; @@ -253,8 +293,8 @@ void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, } #endif - // the idct halves ( >> 1) the pitch - vp9_short_idct8x8_c(input, output, 16); + // the idct halves ( >> 1) the pitch + vp9_short_idct8x8_c(input, output, 16); #ifdef DEC_DEBUG if (dec_debug) { int j; @@ -266,30 +306,10 @@ void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, } #endif - vpx_memset(input, 0, 128);// test what should i put here + vpx_memset(input, 0, 128); - for (b = 0; b < 4; b++) { - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - int a = diff_ptr[c] + pred[c]; + add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dest[c] = (unsigned char) a; - } - - dest += stride; - diff_ptr += 8; - pred += pitch; - } - diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4; - dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4; - pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4; - } #ifdef DEC_DEBUG if (dec_debug) { int k, j; @@ -303,101 +323,14 @@ void vp9_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, } } #endif + } } -void vp9_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, - unsigned char *dest, int pitch, int stride, - int Dc) { // Dc for 1st order T in some rear case - short output[64]; - short *diff_ptr = output; - int r, c, b; - int i; - unsigned char *origdest = dest; - unsigned char *origpred = pred; - - input[0] = (short)Dc;// Dc is the reconstructed value, do not need dequantization - // dc value is recovered after dequantization, since dc need not quantization -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Input 8x8\n"); - for (j = 0; j < 64; j++) { - printf("%d ", input[j]); - if (j % 8 == 7) printf("\n"); - } - } -#endif - for (i = 1; i < 64; i++) { - input[i] = input[i] * dq[1]; - } - -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Input DQ 8x8\n"); - for (j = 0; j < 64; j++) { - printf("%d ", input[j]); - if (j % 8 == 7) printf("\n"); - } - } -#endif - - // the idct halves ( >> 1) the pitch - vp9_short_idct8x8_c(input, output, 16); -#ifdef DEC_DEBUG - if (dec_debug) { - int j; - printf("Output 8x8\n"); - for (j = 0; j < 64; j++) { - printf("%d ", output[j]); - if (j % 8 == 7) printf("\n"); - } - } -#endif - vpx_memset(input, 0, 128); - - for (b = 0; b < 4; b++) { - for (r = 0; r < 4; r++) { - for (c = 0; c < 4; c++) { - int a = diff_ptr[c] + pred[c]; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dest[c] = (unsigned char) a; - } - - dest += stride; - diff_ptr += 8; - pred += pitch; - } - diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4; - dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4; - pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4; - } -#ifdef DEC_DEBUG - if (dec_debug) { - int k, j; - printf("Final 8x8\n"); - for (j = 0; j < 8; j++) { - for (k = 0; k < 8; k++) { - printf("%d ", origdest[k]); - } - printf("\n"); - origdest += stride; - } - } -#endif -} - -void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq, - unsigned char *pred, unsigned char *dest, +void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, + int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride) { - short output[256]; - short *diff_ptr = output; + int16_t output[256]; + int16_t *diff_ptr = output; int i; input[0]= input[0] * dq[0]; @@ -414,7 +347,7 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, short *dq, vpx_memset(input, 0, 512); - recon(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); } void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred, @@ -422,7 +355,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred, uint16_t eobs) { int16_t output[256]; int16_t *diff_ptr = output; - int r, c, i; + int i; /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ @@ -433,28 +366,15 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred, /* DC only DCT coefficient. */ int16_t out; + /* Note: the idct1 will need to be modified accordingly whenever + * vp9_short_idct16x16_c() is modified. */ out = (input[0] * dq[0] + 2) >> 2; out = (out + 2) >> 2; out = (out + 4) >> 3; input[0] = 0; - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) { - int a = out + pred[c]; - - if (a < 0) - a = 0; - else if (a > 255) - a = 255; - - dest[c] = (uint8_t) a; - } - - dest += stride; - pred += pitch; - } - + add_constant_residual(out, pred, pitch, dest, stride, 16, 16); } else if (eobs <= 10) { input[0]= input[0] * dq[0]; input[1] = input[1] * dq[1]; @@ -475,7 +395,7 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred, input[32] = input[33] = 0; input[48] = 0; - recon(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); } else { input[0]= input[0] * dq[0]; @@ -488,6 +408,6 @@ void vp9_dequant_idct_add_16x16_c(int16_t *input, int16_t *dq, uint8_t *pred, vpx_memset(input, 0, 512); - recon(diff_ptr, pred, pitch, dest, stride, 16, 16); + add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16); } } diff --git a/vp9/decoder/idct_blk.c b/vp9/decoder/idct_blk.c index cbf96e0da..a0fcd6d61 100644 --- a/vp9/decoder/idct_blk.c +++ b/vp9/decoder/idct_blk.c @@ -177,12 +177,21 @@ void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, short *dq, int stride, unsigned short *eobs, short *dc, MACROBLOCKD *xd) { - vp9_dequant_dc_idct_add_8x8_c(q, dq, pre, dst, 16, stride, dc[0]); - vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, dc[1]); - vp9_dequant_dc_idct_add_8x8_c(&q[128], dq, pre + 8 * 16, - dst + 8 * stride, 16, stride, dc[4]); - vp9_dequant_dc_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8, - dst + 8 * stride + 8, 16, stride, dc[8]); + q[0] = dc[0]; + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]); + + q[64] = dc[1]; + vp9_dequant_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, 1, + xd->eobs[4]); + + q[128] = dc[4]; + vp9_dequant_idct_add_8x8_c(&q[128], dq, pre + 8 * 16, + dst + 8 * stride, 16, stride, 1, xd->eobs[8]); + + q[192] = dc[8]; + vp9_dequant_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8, + dst + 8 * stride + 8, 16, stride, 1, + xd->eobs[12]); } #if CONFIG_SUPERBLOCKS @@ -191,13 +200,22 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq, int stride, unsigned short *eobs, short *dc, MACROBLOCKD *xd) { - vp9_dequant_dc_idct_add_8x8_c(q, dq, dst, dst, stride, stride, dc[0]); - vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, dst + 8, - dst + 8, stride, stride, dc[1]); - vp9_dequant_dc_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, - dst + 8 * stride, stride, stride, dc[4]); - vp9_dequant_dc_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, - dst + 8 * stride + 8, stride, stride, dc[8]); + q[0] = dc[0]; + vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]); + + q[64] = dc[1]; + vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8, + dst + 8, stride, stride, 1, xd->eobs[4]); + + q[128] = dc[4]; + vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, + dst + 8 * stride, stride, stride, 1, + xd->eobs[8]); + + q[192] = dc[8]; + vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, + dst + 8 * stride + 8, stride, stride, 1, + xd->eobs[12]); } #endif @@ -209,13 +227,14 @@ void vp9_dequant_idct_add_y_block_8x8_c(short *q, short *dq, unsigned char *origdest = dst; unsigned char *origpred = pre; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]); vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8, - origdest + 8, 16, stride); + origdest + 8, 16, stride, 0, xd->eobs[4]); vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16, - origdest + 8 * stride, 16, stride); + origdest + 8 * stride, 16, stride, 0, xd->eobs[8]); vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8, - origdest + 8 * stride + 8, 16, stride); + origdest + 8 * stride + 8, 16, stride, 0, + xd->eobs[12]); } void vp9_dequant_idct_add_uv_block_8x8_c(short *q, short *dq, @@ -224,12 +243,12 @@ void vp9_dequant_idct_add_uv_block_8x8_c(short *q, short *dq, unsigned char *dstv, int stride, unsigned short *eobs, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]); q += 64; pre += 64; - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride); + vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]); } #if CONFIG_SUPERBLOCKS @@ -239,11 +258,12 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq, int stride, unsigned short *eobs, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride); + vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0, + xd->eobs[16]); - q += 64; - - vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride); + q += 64; + vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0, + xd->eobs[20]); } #endif