From 190c2b4591039a2acef4964581e0d24d82de0d61 Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Mon, 21 Oct 2013 15:27:35 -0700 Subject: [PATCH] Using stride (# of elements) instead of pitch (bytes) in fdct4x4. Just making fdct consistent with iht/idct/fht functions which all use stride (# of elements) as input argument. Change-Id: I0ba3c52513a5fdd194f1e7e2901092671398985b --- test/fdct4x4_test.cc | 10 +++++----- vp9/common/vp9_rtcd_defs.sh | 2 +- vp9/encoder/vp9_dct.c | 14 ++++++-------- vp9/encoder/vp9_encodemb.c | 4 ++-- vp9/encoder/vp9_onyx_if.c | 4 ++-- vp9/encoder/vp9_rdopt.c | 4 ++-- vp9/encoder/x86/vp9_dct_sse2.c | 3 +-- 7 files changed, 19 insertions(+), 22 deletions(-) diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc index edc194d63..18c12a857 100644 --- a/test/fdct4x4_test.cc +++ b/test/fdct4x4_test.cc @@ -31,15 +31,15 @@ void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, } void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int /*tx_type*/) { - vp9_idct4x4_16_add_c(out, dst, stride >> 1); + vp9_idct4x4_16_add_c(out, dst, stride); } void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/, int stride, int tx_type) { - vp9_short_fht4x4_c(in, out, stride >> 1, tx_type); + vp9_short_fht4x4_c(in, out, stride, tx_type); } void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst, int stride, int tx_type) { - vp9_iht4x4_16_add_c(out, dst, stride >> 1, tx_type); + vp9_iht4x4_16_add_c(out, dst, stride, tx_type); } class FwdTrans4x4Test : public ::testing::TestWithParam { @@ -78,7 +78,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16); DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16); - const int pitch = 8; + const int pitch = 4; int count_sign_block[16][2]; const int count_test_block = 1000000; @@ -152,7 +152,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) { for (int j = 0; j < 16; ++j) test_input_block[j] = src[j] - dst[j]; - const int pitch = 8; + const int pitch = 4; RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_); for (int j = 0; j < 16; ++j) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 6fa9e22bb..c1efcbb29 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -698,7 +698,7 @@ specialize vp9_short_fht16x16 sse2 prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int pitch" specialize vp9_short_fdct8x8 sse2 -prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch" +prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride" specialize vp9_short_fdct4x4 sse2 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride" diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 23c652d0f..bd24272ca 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -36,14 +36,13 @@ static void fdct4(const int16_t *input, int16_t *output) { output[3] = dct_const_round_shift(temp2); } -void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, // as the first pass results are transposed, we tranpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). - const int stride = pitch >> 1; int pass; // We need an intermediate buffer between passes. int16_t intermediate[4 * 4]; @@ -587,18 +586,17 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output, /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ -void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) { +void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) { int i; int a1, b1, c1, d1, e1; int16_t *ip = input; int16_t *op = output; - int pitch_short = pitch >> 1; for (i = 0; i < 4; i++) { - a1 = ip[0 * pitch_short]; - b1 = ip[1 * pitch_short]; - c1 = ip[2 * pitch_short]; - d1 = ip[3 * pitch_short]; + a1 = ip[0 * stride]; + b1 = ip[1 * stride]; + c1 = ip[2 * stride]; + d1 = ip[3 * stride]; a1 += b1; d1 = d1 - c1; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 13d8aa827..ece537c7d 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -402,7 +402,7 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, xoff = 4 * (block & twmask); yoff = 4 * (block >> twl); src_diff = p->src_diff + 4 * bw * yoff + xoff; - x->fwd_txm4x4(src_diff, coeff, bw * 8); + x->fwd_txm4x4(src_diff, coeff, bw * 4); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan, iscan); @@ -612,7 +612,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (tx_type != DCT_DCT) vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); else - x->fwd_txm4x4(src_diff, coeff, bw * 8); + x->fwd_txm4x4(src_diff, coeff, bw * 4); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan, iscan); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index f6b2a2876..a2556f4e8 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -959,9 +959,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->optimize_coefficients = 0; } - cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; + cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { - cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; + cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4; } cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9ef507698..7216967b0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1089,7 +1089,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_short_fht4x4(src_diff, coeff, 8, tx_type); x->quantize_b_4x4(x, block, tx_type, 16); } else { - x->fwd_txm4x4(src_diff, coeff, 16); + x->fwd_txm4x4(src_diff, coeff, 8); x->quantize_b_4x4(x, block, tx_type, 16); } @@ -1566,7 +1566,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, k += (idy * 2 + idx); coeff = BLOCK_OFFSET(p->coeff, k); x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), - coeff, 16); + coeff, 8); x->quantize_b_4x4(x, k, DCT_DCT, 16); thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index 457883fb8..f460c598c 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -12,14 +12,13 @@ #include "vp9/common/vp9_idct.h" // for cospi constants #include "vpx_ports/mem.h" -void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) { +void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, // as the first pass results are transposed, we tranpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). - const int stride = pitch >> 1; int pass; // Constants // When we use them, in one case, they are all the same. In all others