Using stride (# of elements) instead of pitch (bytes) in fdct16x16.

Just making fdct consistent with iht/idct/fht functions which all use stride (# of elements) as input argument. Change-Id: I2d95fdcbba96aaa0ed24a80870cb38f53487a97d
2013-10-18 11:49:33 -07:00 · 2013-10-18 11:49:33 -07:00 · 1aa7fd5aef
commit 1aa7fd5aef
parent eaf2d4cbfc
5 changed files with 8 additions and 11 deletions
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@ -395,8 +395,7 @@ class Trans16x16TestBase {
      for (int j = 0; j < kNumCoeffs; ++j)
        coeff[j] = round(out_r[j]);

-      const int pitch = 32;
-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
+      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));

      for (int j = 0; j < kNumCoeffs; ++j) {
        const uint32_t diff = dst[j] - src[j];
@ -421,7 +420,7 @@ class Trans16x16DCT : public Trans16x16TestBase,
    fwd_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
    tx_type_  = GET_PARAM(2);
-    pitch_    = 32;
+    pitch_    = 16;
    fwd_txfm_ref = fdct16x16_ref;
  }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
@ -431,7 +430,7 @@ class Trans16x16DCT : public Trans16x16TestBase,
    fwd_txfm_(in, out, stride);
  }
  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride >> 1);
+    inv_txfm_(out, dst, stride);
  }

  fdct_t fwd_txfm_;
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@ -707,7 +707,7 @@ specialize vp9_short_fdct32x32 sse2
 prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int stride"
 specialize vp9_short_fdct32x32_rd sse2

-prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int stride"
 specialize vp9_short_fdct16x16 sse2

 prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@ -302,14 +302,13 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {
  }
 }

-void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
  // the results. In the second one, we transform the rows. To achieve that,
  // as the first pass results are transposed, we tranpose the columns (that
  // is the transposed rows) and transpose the results (so that it goes back
  // in normal/row positions).
-  const int stride = pitch >> 1;
  int pass;
  // We need an intermediate buffer between passes.
  int16_t intermediate[256];
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@ -379,7 +379,7 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
      xoff = 16 * (block & twmask);
      yoff = 16 * (block >> twl);
      src_diff = p->src_diff + 4 * bw * yoff + xoff;
-      vp9_short_fdct16x16(src_diff, coeff, bw * 8);
+      vp9_short_fdct16x16(src_diff, coeff, bw * 4);
      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, p->zbin_extra, eob, scan, iscan);
@ -559,7 +559,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
      if (tx_type != DCT_DCT)
        vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type);
      else
-        vp9_short_fdct16x16(src_diff, coeff, bw * 8);
+        vp9_short_fdct16x16(src_diff, coeff, bw * 4);
      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
                     pd->dequant, p->zbin_extra, eob, scan, iscan);
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@ -1056,14 +1056,13 @@ void vp9_short_fht8x8_sse2(int16_t *input, int16_t *output,
  write_buffer_8x8(output, in, 8);
 }

-void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int stride) {
  // The 2D transform is done with two passes which are actually pretty
  // similar. In the first one, we transform the columns and transpose
  // the results. In the second one, we transform the rows. To achieve that,
  // as the first pass results are transposed, we tranpose the columns (that
  // is the transposed rows) and transpose the results (so that it goes back
  // in normal/row positions).
-  const int stride = pitch >> 1;
  int pass;
  // We need an intermediate buffer between passes.
  DECLARE_ALIGNED_ARRAY(16, int16_t, intermediate, 256);