From 5cfd82bcaf666c1b9cacd8a4899fc703598aa5b0 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 6 Feb 2013 14:13:05 -0800 Subject: [PATCH] Use fdct8x4 instead of fdct4x4 where the block size allows it. This allows for faster SIMD implementations in the future (currently there is no speed impact). Change-Id: I732647e9148b5dcb44e6bc8728138f0141218329 --- vp9/encoder/vp9_encodeintra.c | 6 ++++++ vp9/encoder/vp9_encodemb.c | 4 ++++ vp9/encoder/vp9_rdopt.c | 14 +++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 09ea045d7..eacc2cd28 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -168,6 +168,12 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob); + } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { + x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4_pair(be, be + 1, b, b + 1); + vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); + vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32); + i++; } else { x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 45278a71b..52eabf129 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -187,6 +187,10 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) { if (tx_type != DCT_DCT) { assert(has_2nd_order == 0); vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4); + } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) { + x->vp9_short_fdct8x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + i++; } else { x->vp9_short_fdct4x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2868db55f..5324db530 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1459,21 +1459,33 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, distortion = 0; rate_t = 0; for (i = 0; i < 4; ++i) { + int do_two = 0; b = &xd->block[ib + iblock[i]]; be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, b); if (tx_type != DCT_DCT) { vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4); vp9_ht_quantize_b_4x4(be, b, tx_type); + } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { + x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32); + x->quantize_b_4x4_pair(be, be + 1, b, b + 1); + do_two = 1; } else { x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(be, b); } - distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16); + distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, &ta0, &tl0, TX_4X4); + if (do_two) { + rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC, + // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0, + &ta0, &tl0, + TX_4X4); + i++; + } } b = &xd->block[ib]; be = &x->block[ib];