diff --git a/configure b/configure
index 46919bd3a..04090786f 100755
--- a/configure
+++ b/configure
@@ -249,6 +249,7 @@ EXPERIMENT_LIST="
     newcoefcontext
     enable_6tap
     abovesprefmv
+    intht
 "
 CONFIG_LIST="
     external_build
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 337dc14f5..c6702ae31 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -413,9 +413,9 @@ typedef struct macroblockd {
 
 } MACROBLOCKD;
 
-#define ACTIVE_HT 110                // quantization stepsize threshold
+#define ACTIVE_HT   110                // quantization stepsize threshold
 
-#define ACTIVE_HT8 300
+#define ACTIVE_HT8  300
 
 #define ACTIVE_HT16 300
 
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index f9318191d..92367fe5a 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -120,6 +120,42 @@ static const int16_t idct_i16[256] = {
    4096, -3675,  3218, -2731,  2217, -1682,  1130,  -568
 };
 
+#if CONFIG_INTHT
+static const int16_t iadst_i16[256] = {
+   284,   850,  1407,  1951,  2476,  2977,  3450,  3889,
+  4291,  4652,  4967,  5235,  5453,  5618,  5729,  5784,
+   850,  2476,  3889,  4967,  5618,  5784,  5453,  4652,
+  3450,  1951,   284, -1407, -2977, -4291, -5235, -5729,
+  1407,  3889,  5453,  5729,  4652,  2476,  -284, -2977,
+ -4967, -5784, -5235, -3450,  -850,  1951,  4291,  5618,
+  1951,  4967,  5729,  3889,   284, -3450, -5618, -5235,
+ -2476,  1407,  4652,  5784,  4291,   850, -2977, -5453,
+  2476,  5618,  4652,   284, -4291, -5729, -2977,  1951,
+  5453,  4967,   850, -3889, -5784, -3450,  1407,  5235,
+  2977,  5784,  2476, -3450, -5729, -1951,  3889,  5618,
+  1407, -4291, -5453,  -850,  4652,  5235,   284, -4967,
+  3450,  5453,  -284, -5618, -2977,  3889,  5235,  -850,
+ -5729, -2476,  4291,  4967, -1407, -5784, -1951,  4652,
+  3889,  4652, -2977, -5235,  1951,  5618,  -850, -5784,
+  -284,  5729,  1407, -5453, -2476,  4967,  3450, -4291,
+  4291,  3450, -4967, -2476,  5453,  1407, -5729,  -284,
+  5784,  -850, -5618,  1951,  5235, -2977, -4652,  3889,
+  4652,  1951, -5784,  1407,  4967, -4291, -2476,  5729,
+  -850, -5235,  3889,  2977, -5618,   284,  5453, -3450,
+  4967,   284, -5235,  4652,   850, -5453,  4291,  1407,
+ -5618,  3889,  1951, -5729,  3450,  2476, -5784,  2977,
+  5235, -1407, -3450,  5784, -3889,  -850,  4967, -5453,
+  1951,  2977, -5729,  4291,   284, -4652,  5618, -2476,
+  5453, -2977,  -850,  4291, -5784,  4652, -1407, -2476,
+  5235, -5618,  3450,   284, -3889,  5729, -4967,  1951,
+  5618, -4291,  1951,   850, -3450,  5235, -5784,  4967,
+ -2977,   284,  2476, -4652,  5729, -5453,  3889, -1407,
+  5729, -5235,  4291, -2977,  1407,   284, -1951,  3450,
+ -4652,  5453, -5784,  5618, -4967,  3889, -2476,   850,
+  5784, -5729,  5618, -5453,  5235, -4967,  4652, -4291,
+  3889, -3450,  2977, -2476,  1951, -1407,   850,  -284
+};
+#else
 static const int16_t iadst_i16[256] = {
     542,  1607,  2614,  3526,  4311,  4940,  5390,  5646,
    5698,  5543,  5189,  4646,  3936,  3084,  2120,  1080,
@@ -154,7 +190,7 @@ static const int16_t iadst_i16[256] = {
    5698, -5646,  5543, -5390,  5189, -4940,  4646, -4311,
    3936, -3526,  3084, -2614,  2120, -1607,  1080,  -542
 };
-
+#endif
 
 /* Converted the transforms to integer form. */
 #define HORIZONTAL_SHIFT 14  // 16
@@ -657,6 +693,138 @@ void vp9_short_idct8x8_c(int16_t *input, int16_t *output, int pitch) {
     }
 }
 
+#if CONFIG_INTHT
+static void iadst8_1d(int16_t *input, int16_t *output) {
+  int x0, x1, x2, x3, x4, x5, x6, x7;
+  int s0, s1, s2, s3, s4, s5, s6, s7;
+
+  x0 = input[7];
+  x1 = input[0];
+  x2 = input[5];
+  x3 = input[2];
+  x4 = input[3];
+  x5 = input[4];
+  x6 = input[1];
+  x7 = input[6];
+
+  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+    output[0] = output[1] = output[2] = output[3] = output[4]
+                    = output[5] = output[6] = output[7] = 0;
+    return;
+  }
+
+  // stage 1
+  s0 = cospi_2_64  * x0 + cospi_30_64 * x1;
+  s1 = cospi_30_64 * x0 - cospi_2_64  * x1;
+  s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+  s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+  s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+  s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+  s6 = cospi_26_64 * x6 + cospi_6_64  * x7;
+  s7 = cospi_6_64  * x6 - cospi_26_64 * x7;
+
+  x0 = dct_const_round_shift(s0 + s4);
+  x1 = dct_const_round_shift(s1 + s5);
+  x2 = dct_const_round_shift(s2 + s6);
+  x3 = dct_const_round_shift(s3 + s7);
+  x4 = dct_const_round_shift(s0 - s4);
+  x5 = dct_const_round_shift(s1 - s5);
+  x6 = dct_const_round_shift(s2 - s6);
+  x7 = dct_const_round_shift(s3 - s7);
+
+  // stage 2
+  s0 = x0;
+  s1 = x1;
+  s2 = x2;
+  s3 = x3;
+  s4 = cospi_8_64  * x4 + cospi_24_64 * x5;
+  s5 = cospi_24_64 * x4 - cospi_8_64  * x5;
+  s6 = - cospi_24_64 * x6 + cospi_8_64  * x7;
+  s7 =   cospi_8_64  * x6 + cospi_24_64 * x7;
+
+  x0 = s0 + s2;
+  x1 = s1 + s3;
+  x2 = s0 - s2;
+  x3 = s1 - s3;
+  x4 = dct_const_round_shift(s4 + s6);
+  x5 = dct_const_round_shift(s5 + s7);
+  x6 = dct_const_round_shift(s4 - s6);
+  x7 = dct_const_round_shift(s5 - s7);
+
+  // stage 3
+  s2 = cospi_16_64 * (x2 + x3);
+  s3 = cospi_16_64 * (x2 - x3);
+  s6 = cospi_16_64 * (x6 + x7);
+  s7 = cospi_16_64 * (x6 - x7);
+
+  x2 = dct_const_round_shift(s2);
+  x3 = dct_const_round_shift(s3);
+  x6 = dct_const_round_shift(s6);
+  x7 = dct_const_round_shift(s7);
+
+  output[0] =   x0;
+  output[1] = - x4;
+  output[2] =   x6;
+  output[3] = - x2;
+  output[4] =   x3;
+  output[5] = - x7;
+  output[6] =   x5;
+  output[7] = - x1;
+
+  return;
+}
+
+void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
+                        TX_TYPE tx_type, int pitch) {
+  int16_t out[8 * 8];
+  int16_t *outptr = &out[0];
+  const int short_pitch = pitch >> 1;
+  int i, j;
+  int16_t temp_in[8], temp_out[8];
+
+  void (*invr)(int16_t*, int16_t*);
+  void (*invc)(int16_t*, int16_t*);
+
+  switch (tx_type) {
+    case ADST_ADST:
+      invc = &iadst8_1d;
+      invr = &iadst8_1d;
+      break;
+    case ADST_DCT:
+      invc = &iadst8_1d;
+      invr = &idct8_1d;
+      break;
+    case DCT_ADST:
+      invc = &idct8_1d;
+      invr = &iadst8_1d;
+      break;
+    case DCT_DCT:
+      invc = &idct8_1d;
+      invr = &idct8_1d;
+      break;
+    default:
+      assert(0);
+  }
+
+  // inverse transform row vectors
+  for (i = 0; i < 8; ++i) {
+    invr(input, outptr);
+    input += 8;
+    outptr += 8;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 8; ++i) {
+    for (j = 0; j < 8; ++j)
+      temp_in[j] = out[j * 8 + i];
+    invc(temp_in, temp_out);
+    for (j = 0; j < 8; ++j)
+      output[j * short_pitch + i] = (temp_out[j] + 16) >> 5;
+  }
+}
+#endif
+
+
 void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) {
   int16_t out[8 * 8];
   int16_t *outptr = &out[0];
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index b5e6e3cc2..c81fe2d0d 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -91,8 +91,13 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
   for (i = 0; i < 9; i += 8) {
     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
     if (tx_type != DCT_DCT) {
+#if CONFIG_INTHT
+      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,
+                           tx_type, 32);
+#else
       vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
                  xd->block[i].eob);
+#endif
     } else {
       vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
                                   &blockd[i].diff[0], 32);
@@ -101,8 +106,13 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
   for (i = 2; i < 11; i += 8) {
     TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
     if (tx_type != DCT_DCT) {
+#if CONFIG_INTHT
+      vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
+                           tx_type, 32);
+#else
       vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
                  xd->block[i + 2].eob);
+#endif
     } else {
       vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
                                   &blockd[i].diff[0], 32);
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 5339aaa5f..5e4d485b5 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -411,6 +411,11 @@ specialize vp9_short_idct32x32
 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
 specialize vp9_short_idct1_32x32
 
+#if CONFIG_INTHT
+prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int tx_type, int pitch"
+specialize vp9_short_iht8x8
+#endif
+
 prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"
 specialize vp9_ihtllm
 
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
index 18d4e59c7..839a918fb 100644
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -92,8 +92,11 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
       input[i] = dq[1] * input[i];
     }
 
+#if CONFIG_INTHT
+    vp9_short_iht8x8(input, output, tx_type, 16);
+#else
     vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
-
+#endif
     vpx_memset(input, 0, 128);
 
     add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index dcd19ca42..d4f5c0c07 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -104,6 +104,26 @@ static const int16_t dct_i8[64] = {
    16069, -13623,   9102,  -3196
 };
 
+#if CONFIG_INTHT
+static const int16_t adst_i8[64] = {
+   1606,    4756,     7723,    10394,
+  12665,   14449,    15678,    16305,
+   4756,   12665,    16305,    14449,
+   7723,   -1606,   -10394,   -15678,
+   7723,   16305,    10394,    -4756,
+ -15678,  -12665,     1606,    14449,
+  10394,   14449,    -4756,   -16305,
+  -1606,   15678,     7723,   -12665,
+  12665,    7723,   -15678,    -1606,
+  16305,   -4756,   -14449,    10394,
+  14449,   -1606,   -12665,    15678,
+  -4756,  -10394,    16305,    -7723,
+  15678,  -10394,     1606,     7723,
+ -14449,   16305,   -12665,     4756,
+  16305,  -15678,    14449,   -12665,
+  10394,   -7723,     4756,    -1606
+};
+#else
 static const int16_t adst_i8[64] = {
     2921,   5742,   8368,  10708,
    12684,  14228,  15288,  15827,
@@ -122,6 +142,7 @@ static const int16_t adst_i8[64] = {
     5742, -10708,  14228, -15827,
    15288, -12684,   8368,  -2921
 };
+#endif
 
 static const float dct_16[256] = {
   0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,  0.250000,
@@ -229,6 +250,42 @@ static const int16_t dct_i16[256] = {
    11529, -11086,  10217,  -8955,   7350,  -5461,   3363,  -1136
 };
 
+#if CONFIG_INTHT
+static const int16_t adst_i16[256] = {
+     568,    1700,    2815,    3903,    4953,    5956,    6901,    7780,
+    8584,    9305,    9937,   10473,   10908,   11238,   11459,   11571,
+    1700,    4953,    7780,    9937,   11238,   11571,   10908,    9305,
+    6901,    3903,     568,   -2815,   -5956,   -8584,  -10473,  -11459,
+    2815,    7780,   10908,   11459,    9305,    4953,    -568,   -5956,
+   -9937,  -11571,  -10473,   -6901,   -1700,    3903,    8584,   11238,
+    3903,    9937,   11459,    7780,     568,   -6901,  -11238,  -10473,
+   -4953,    2815,    9305,   11571,    8584,    1700,   -5956,  -10908,
+    4953,   11238,    9305,     568,   -8584,  -11459,   -5956,    3903,
+   10908,    9937,    1700,   -7780,  -11571,   -6901,    2815,   10473,
+    5956,   11571,    4953,   -6901,  -11459,   -3903,    7780,   11238,
+    2815,   -8584,  -10908,   -1700,    9305,   10473,     568,   -9937,
+    6901,   10908,    -568,  -11238,   -5956,    7780,   10473,   -1700,
+  -11459,   -4953,    8584,    9937,   -2815,  -11571,   -3903,    9305,
+    7780,    9305,   -5956,  -10473,    3903,   11238,   -1700,  -11571,
+    -568,   11459,    2815,  -10908,   -4953,    9937,    6901,   -8584,
+    8584,    6901,   -9937,   -4953,   10908,    2815,  -11459,    -568,
+   11571,   -1700,  -11238,    3903,   10473,   -5956,   -9305,    7780,
+    9305,    3903,  -11571,    2815,    9937,   -8584,   -4953,   11459,
+   -1700,  -10473,    7780,    5956,  -11238,     568,   10908,   -6901,
+    9937,     568,  -10473,    9305,    1700,  -10908,    8584,    2815,
+  -11238,    7780,    3903,  -11459,    6901,    4953,  -11571,    5956,
+   10473,   -2815,   -6901,   11571,   -7780,   -1700,    9937,  -10908,
+    3903,    5956,  -11459,    8584,     568,   -9305,   11238,   -4953,
+   10908,   -5956,   -1700,    8584,  -11571,    9305,   -2815,   -4953,
+   10473,  -11238,    6901,     568,   -7780,   11459,   -9937,    3903,
+   11238,   -8584,    3903,    1700,   -6901,   10473,  -11571,    9937,
+   -5956,     568,    4953,   -9305,   11459,  -10908,    7780,   -2815,
+   11459,  -10473,    8584,   -5956,    2815,     568,   -3903,    6901,
+   -9305,   10908,  -11571,   11238,   -9937,    7780,   -4953,    1700,
+   11571,  -11459,   11238,  -10908,   10473,   -9937,    9305,   -8584,
+    7780,   -6901,    5956,   -4953,    3903,   -2815,    1700,    -568
+};
+#else
 static const int16_t adst_i16[256] = {
     1084,   2159,   3214,   4240,   5228,   6168,   7052,   7873,
     8622,   9293,   9880,  10377,  10781,  11087,  11292,  11395,
@@ -263,6 +320,7 @@ static const int16_t adst_i16[256] = {
     2159,  -4240,   6168,  -7873,   9293, -10377,  11087, -11395,
    11292, -10781,   9880,  -8622,   7052,  -5228,   3214,  -1084
 };
+#endif
 
 static const int xC1S7 = 16069;
 static const int xC2S6 = 15137;
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index eacc2cd28..fa7229714 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -152,8 +152,14 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
       vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,
                 tx_type, 8);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
+
+#if CONFIG_INTHT
+      vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
+                            tx_type, 32);
+#else
       vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
                    tx_type, 8, xd->block[idx].eob);
+#endif
     } else {
       x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
diff --git a/vpxenc.c b/vpxenc.c
index cb2569acf..10a606330 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -2472,7 +2472,6 @@ int main(int argc, const char **argv_) {
         " and --passes=2\n", stream->index, global.pass);
     });
 
-
     /* Use the frame rate from the file only if none was specified
      * on the command-line.
      */