Merge "Changing pitch value meaning for fht and iht transforms." into experimental

2013-02-26 10:44:15 -08:00 · 2013-02-26 10:44:15 -08:00 · 998bed1d2c
commit 998bed1d2c
parent c4ae97911a 9bf3f75168
7 changed files with 35 additions and 43 deletions
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@ -122,7 +122,7 @@ void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,
  }
 }

-void idct4_1d(int16_t *input, int16_t *output) {
+static void idct4_1d(int16_t *input, int16_t *output) {
  int16_t step[4];
  int temp1, temp2;
  // stage 1
@ -200,7 +200,7 @@ void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr,
  }
 }

-void idct8_1d(int16_t *input, int16_t *output) {
+static void idct8_1d(int16_t *input, int16_t *output) {
  int16_t step1[8], step2[8];
  int temp1, temp2;
  // stage 1
@ -320,10 +320,9 @@ static const transform_2d IHT_4[] = {

 void vp9_short_iht4x4_c(int16_t *input, int16_t *output,
                        int pitch, TX_TYPE tx_type) {
+  int i, j;
  int16_t out[4 * 4];
  int16_t *outptr = out;
-  const int half_pitch = pitch >> 1;
-  int i, j;
  int16_t temp_in[4], temp_out[4];
  const transform_2d ht = IHT_4[tx_type];

@ -340,7 +339,7 @@ void vp9_short_iht4x4_c(int16_t *input, int16_t *output,
      temp_in[j] = out[j * 4 + i];
    ht.cols(temp_in, temp_out);
    for (j = 0; j < 4; ++j)
-      output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
+      output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
  }
 }

@ -430,10 +429,9 @@ static const transform_2d IHT_8[] = {

 void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
                        int pitch, TX_TYPE tx_type) {
+  int i, j;
  int16_t out[8 * 8];
  int16_t *outptr = out;
-  const int half_pitch = pitch >> 1;
-  int i, j;
  int16_t temp_in[8], temp_out[8];
  const transform_2d ht = IHT_8[tx_type];

@ -450,7 +448,7 @@ void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
      temp_in[j] = out[j * 8 + i];
    ht.cols(temp_in, temp_out);
    for (j = 0; j < 8; ++j)
-      output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
+      output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
  }
 }

@ -486,7 +484,7 @@ void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) {
  output[0] = ROUND_POWER_OF_TWO(out, 5);
 }

-void idct16_1d(int16_t *input, int16_t *output) {
+static void idct16_1d(int16_t *input, int16_t *output) {
  int16_t step1[16], step2[16];
  int temp1, temp2;

@ -853,18 +851,17 @@ static const transform_2d IHT_16[] = {
 };

 void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
-                          int pitch, TX_TYPE tx_type) {
+                          int input_pitch, TX_TYPE tx_type) {
+  int i, j;
  int16_t out[16 * 16];
  int16_t *outptr = out;
-  const int half_pitch = pitch >> 1;
-  int i, j;
  int16_t temp_in[16], temp_out[16];
  const transform_2d ht = IHT_16[tx_type];

  // Rows
  for (i = 0; i < 16; ++i) {
    ht.rows(input, outptr);
-    input += half_pitch;
+    input += input_pitch;
    outptr += 16;
  }

@ -912,7 +909,7 @@ void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
  output[0] = ROUND_POWER_OF_TWO(out, 6);
 }

-void idct32_1d(int16_t *input, int16_t *output) {
+static void idct32_1d(int16_t *input, int16_t *output) {
  int16_t step1[32], step2[32];
  int temp1, temp2;

--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@ -25,8 +25,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
  for (i = 0; i < 16; i++) {
    TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
-      vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff,
-                       32, tx_type);
+      vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
    } else {
      vp9_inverse_transform_b_4x4(xd, i, 32);
    }
@ -58,8 +57,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
  for (i = 0; i < 9; i += 8) {
    TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
-      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,
-                           32, tx_type);
+      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
    } else {
      vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
                                  &blockd[i].diff[0], 32);
@ -69,7 +67,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
    TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
-                           32, tx_type);
+                           16, tx_type);
    } else {
      vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
                                  &blockd[i].diff[0], 32);
@ -101,7 +99,7 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
  BLOCKD *bd = &xd->block[0];
  TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
  if (tx_type != DCT_DCT) {
-    vp9_short_iht16x16(bd->dqcoeff, bd->diff, 32, tx_type);
+    vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);
  } else {
    vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
                                  &xd->block[0].diff[0], 32);
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@ -65,7 +65,7 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
  for (i = 0; i < 16; i++)
    input[i] = dq[i] * input[i];

-  vp9_short_iht4x4(input, output, 8, tx_type);
+  vp9_short_iht4x4(input, output, 4, tx_type);
  vpx_memset(input, 0, 32);

  add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
@ -86,7 +86,7 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
    for (i = 1; i < 64; i++)
      input[i] *= dq[1];

-    vp9_short_iht8x8(input, output, 16, tx_type);
+    vp9_short_iht8x8(input, output, 8, tx_type);
    vpx_memset(input, 0, 128);

    add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
@ -247,7 +247,7 @@ void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
      input[i] *= dq[1];

    // inverse hybrid transform
-    vp9_short_iht16x16(input, output, 32, tx_type);
+    vp9_short_iht16x16(input, output, 16, tx_type);

    // the idct halves ( >> 1) the pitch
    // vp9_short_idct16x16_c(input, output, 32);
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@ -105,7 +105,6 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
                        int pitch, TX_TYPE tx_type) {
  int16_t out[4 * 4];
  int16_t *outptr = &out[0];
-  const int short_pitch = pitch >> 1;
  int i, j;
  int16_t temp_in[4], temp_out[4];

@ -137,7 +136,7 @@ void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
  // column transform
  for (i = 0; i < 4; ++i) {
    for (j = 0; j < 4; ++j)
-      temp_in[j] = input[j * short_pitch + i] << 4;
+      temp_in[j] = input[j * pitch + i] << 4;
    if (i == 0 && temp_in[0])
      temp_in[0] += 1;
    fwdc(temp_in, temp_out);
@ -308,7 +307,6 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
                        int pitch, TX_TYPE tx_type) {
  int16_t out[64];
  int16_t *outptr = &out[0];
-  const int short_pitch = pitch >> 1;
  int i, j;
  int16_t temp_in[8], temp_out[8];

@ -339,7 +337,7 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
  // column transform
  for (i = 0; i < 8; ++i) {
    for (j = 0; j < 8; ++j)
-      temp_in[j] = input[j * short_pitch + i] << 2;
+      temp_in[j] = input[j * pitch + i] << 2;
    fwdc(temp_in, temp_out);
    for (j = 0; j < 8; ++j)
      outptr[j * 8 + i] = temp_out[j];
@ -697,7 +695,6 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
                          int pitch, TX_TYPE tx_type) {
  int16_t out[256];
  int16_t *outptr = &out[0];
-  const int short_pitch = pitch >> 1;
  int i, j;
  int16_t temp_in[16], temp_out[16];

@ -728,7 +725,7 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
  // column transform
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j)
-      temp_in[j] = input[j * short_pitch + i] << 2;
+      temp_in[j] = input[j * pitch + i] << 2;
    fwdc(temp_in, temp_out);
    for (j = 0; j < 16; ++j)
      outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@ -54,9 +54,9 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {

  tx_type = get_tx_type_4x4(&x->e_mbd, b);
  if (tx_type != DCT_DCT) {
-    vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);
+    vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
    vp9_ht_quantize_b_4x4(be, b, tx_type);
-    vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type);
+    vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
  } else {
    x->fwd_txm4x4(be->src_diff, be->coeff, 32);
    x->quantize_b_4x4(be, b) ;
@ -149,10 +149,10 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {

    tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
    if (tx_type != DCT_DCT) {
-      vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type);
+      vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
      x->quantize_b_8x8(x->block + idx, xd->block + idx);
      vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
-                            32, tx_type);
+                            16, tx_type);
    } else {
      x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
      x->quantize_b_8x8(x->block + idx, xd->block + idx);
@ -164,9 +164,9 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
      be = &x->block[ib + iblock[i]];
      tx_type = get_tx_type_4x4(xd, b);
      if (tx_type != DCT_DCT) {
-        vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);
+        vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
        vp9_ht_quantize_b_4x4(be, b, tx_type);
-        vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type);
+        vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
      } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
        x->fwd_txm8x4(be->src_diff, be->coeff, 32);
        x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@ -174,7 +174,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
    BLOCK *b = &x->block[i];
    TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
-      vp9_short_fht4x4(b->src_diff, b->coeff, 32, tx_type);
+      vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type);
    } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
      x->fwd_txm8x4(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
@ -209,7 +209,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
    BLOCK *b = &x->block[i];
    tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
-      vp9_short_fht8x8(b->src_diff, b->coeff, 32, tx_type);
+      vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type);
    } else {
      x->fwd_txm8x8(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
@ -219,7 +219,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
    BLOCK *b = &x->block[i];
    tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
-      vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 32, tx_type);
+      vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type);
    } else {
      x->fwd_txm8x8(&x->block[i].src_diff[0],
                           &x->block[i + 2].coeff[0], 32);
@ -247,7 +247,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) {
  TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
  vp9_clear_system_state();
  if (tx_type != DCT_DCT) {
-    vp9_short_fht16x16(b->src_diff, b->coeff, 32, tx_type);
+    vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type);
  } else {
    x->fwd_txm16x16(&x->block[0].src_diff[0],
                           &x->block[0].coeff[0], 32);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@ -1054,7 +1054,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
    b->bmi.as_mode.first = mode;
    tx_type = get_tx_type_4x4(xd, b);
    if (tx_type != DCT_DCT) {
-      vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);
+      vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
      vp9_ht_quantize_b_4x4(be, b, tx_type);
    } else {
      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
@ -1087,7 +1087,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,

  // inverse transform
  if (best_tx_type != DCT_DCT)
-    vp9_short_iht4x4(best_dqcoeff, b->diff, 32, best_tx_type);
+    vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type);
  else
    xd->inv_txm4x4(best_dqcoeff, b->diff, 32);

@ -1351,7 +1351,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
    if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
      TX_TYPE tx_type = get_tx_type_8x8(xd, b);
      if (tx_type != DCT_DCT)
-        vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 32, tx_type);
+        vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
      else
        x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
      x->quantize_b_8x8(x->block + idx, xd->block + idx);
@ -1390,7 +1390,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
        be = &x->block[ib + iblock[i]];
        tx_type = get_tx_type_4x4(xd, b);
        if (tx_type != DCT_DCT) {
-          vp9_short_fht4x4(be->src_diff, be->coeff, 32, tx_type);
+          vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
          vp9_ht_quantize_b_4x4(be, b, tx_type);
        } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
          x->fwd_txm8x4(be->src_diff, be->coeff, 32);