diff --git a/configure b/configure index 46919bd3a..04090786f 100755 --- a/configure +++ b/configure @@ -249,6 +249,7 @@ EXPERIMENT_LIST=" newcoefcontext enable_6tap abovesprefmv + intht " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 337dc14f5..c6702ae31 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -413,9 +413,9 @@ typedef struct macroblockd { } MACROBLOCKD; -#define ACTIVE_HT 110 // quantization stepsize threshold +#define ACTIVE_HT 110 // quantization stepsize threshold -#define ACTIVE_HT8 300 +#define ACTIVE_HT8 300 #define ACTIVE_HT16 300 diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index f9318191d..92367fe5a 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -120,6 +120,42 @@ static const int16_t idct_i16[256] = { 4096, -3675, 3218, -2731, 2217, -1682, 1130, -568 }; +#if CONFIG_INTHT +static const int16_t iadst_i16[256] = { + 284, 850, 1407, 1951, 2476, 2977, 3450, 3889, + 4291, 4652, 4967, 5235, 5453, 5618, 5729, 5784, + 850, 2476, 3889, 4967, 5618, 5784, 5453, 4652, + 3450, 1951, 284, -1407, -2977, -4291, -5235, -5729, + 1407, 3889, 5453, 5729, 4652, 2476, -284, -2977, + -4967, -5784, -5235, -3450, -850, 1951, 4291, 5618, + 1951, 4967, 5729, 3889, 284, -3450, -5618, -5235, + -2476, 1407, 4652, 5784, 4291, 850, -2977, -5453, + 2476, 5618, 4652, 284, -4291, -5729, -2977, 1951, + 5453, 4967, 850, -3889, -5784, -3450, 1407, 5235, + 2977, 5784, 2476, -3450, -5729, -1951, 3889, 5618, + 1407, -4291, -5453, -850, 4652, 5235, 284, -4967, + 3450, 5453, -284, -5618, -2977, 3889, 5235, -850, + -5729, -2476, 4291, 4967, -1407, -5784, -1951, 4652, + 3889, 4652, -2977, -5235, 1951, 5618, -850, -5784, + -284, 5729, 1407, -5453, -2476, 4967, 3450, -4291, + 4291, 3450, -4967, -2476, 5453, 1407, -5729, -284, + 5784, -850, -5618, 1951, 5235, -2977, -4652, 3889, + 4652, 1951, -5784, 1407, 4967, -4291, -2476, 5729, + -850, -5235, 3889, 2977, -5618, 284, 5453, -3450, + 4967, 284, -5235, 4652, 850, -5453, 4291, 1407, + -5618, 3889, 1951, -5729, 3450, 2476, -5784, 2977, + 5235, -1407, -3450, 5784, -3889, -850, 4967, -5453, + 1951, 2977, -5729, 4291, 284, -4652, 5618, -2476, + 5453, -2977, -850, 4291, -5784, 4652, -1407, -2476, + 5235, -5618, 3450, 284, -3889, 5729, -4967, 1951, + 5618, -4291, 1951, 850, -3450, 5235, -5784, 4967, + -2977, 284, 2476, -4652, 5729, -5453, 3889, -1407, + 5729, -5235, 4291, -2977, 1407, 284, -1951, 3450, + -4652, 5453, -5784, 5618, -4967, 3889, -2476, 850, + 5784, -5729, 5618, -5453, 5235, -4967, 4652, -4291, + 3889, -3450, 2977, -2476, 1951, -1407, 850, -284 +}; +#else static const int16_t iadst_i16[256] = { 542, 1607, 2614, 3526, 4311, 4940, 5390, 5646, 5698, 5543, 5189, 4646, 3936, 3084, 2120, 1080, @@ -154,7 +190,7 @@ static const int16_t iadst_i16[256] = { 5698, -5646, 5543, -5390, 5189, -4940, 4646, -4311, 3936, -3526, 3084, -2614, 2120, -1607, 1080, -542 }; - +#endif /* Converted the transforms to integer form. */ #define HORIZONTAL_SHIFT 14 // 16 @@ -657,6 +693,138 @@ void vp9_short_idct8x8_c(int16_t *input, int16_t *output, int pitch) { } } +#if CONFIG_INTHT +static void iadst8_1d(int16_t *input, int16_t *output) { + int x0, x1, x2, x3, x4, x5, x6, x7; + int s0, s1, s2, s3, s4, s5, s6, s7; + + x0 = input[7]; + x1 = input[0]; + x2 = input[5]; + x3 = input[2]; + x4 = input[3]; + x5 = input[4]; + x6 = input[1]; + x7 = input[6]; + + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { + output[0] = output[1] = output[2] = output[3] = output[4] + = output[5] = output[6] = output[7] = 0; + return; + } + + // stage 1 + s0 = cospi_2_64 * x0 + cospi_30_64 * x1; + s1 = cospi_30_64 * x0 - cospi_2_64 * x1; + s2 = cospi_10_64 * x2 + cospi_22_64 * x3; + s3 = cospi_22_64 * x2 - cospi_10_64 * x3; + s4 = cospi_18_64 * x4 + cospi_14_64 * x5; + s5 = cospi_14_64 * x4 - cospi_18_64 * x5; + s6 = cospi_26_64 * x6 + cospi_6_64 * x7; + s7 = cospi_6_64 * x6 - cospi_26_64 * x7; + + x0 = dct_const_round_shift(s0 + s4); + x1 = dct_const_round_shift(s1 + s5); + x2 = dct_const_round_shift(s2 + s6); + x3 = dct_const_round_shift(s3 + s7); + x4 = dct_const_round_shift(s0 - s4); + x5 = dct_const_round_shift(s1 - s5); + x6 = dct_const_round_shift(s2 - s6); + x7 = dct_const_round_shift(s3 - s7); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = cospi_8_64 * x4 + cospi_24_64 * x5; + s5 = cospi_24_64 * x4 - cospi_8_64 * x5; + s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; + s7 = cospi_8_64 * x6 + cospi_24_64 * x7; + + x0 = s0 + s2; + x1 = s1 + s3; + x2 = s0 - s2; + x3 = s1 - s3; + x4 = dct_const_round_shift(s4 + s6); + x5 = dct_const_round_shift(s5 + s7); + x6 = dct_const_round_shift(s4 - s6); + x7 = dct_const_round_shift(s5 - s7); + + // stage 3 + s2 = cospi_16_64 * (x2 + x3); + s3 = cospi_16_64 * (x2 - x3); + s6 = cospi_16_64 * (x6 + x7); + s7 = cospi_16_64 * (x6 - x7); + + x2 = dct_const_round_shift(s2); + x3 = dct_const_round_shift(s3); + x6 = dct_const_round_shift(s6); + x7 = dct_const_round_shift(s7); + + output[0] = x0; + output[1] = - x4; + output[2] = x6; + output[3] = - x2; + output[4] = x3; + output[5] = - x7; + output[6] = x5; + output[7] = - x1; + + return; +} + +void vp9_short_iht8x8_c(int16_t *input, int16_t *output, + TX_TYPE tx_type, int pitch) { + int16_t out[8 * 8]; + int16_t *outptr = &out[0]; + const int short_pitch = pitch >> 1; + int i, j; + int16_t temp_in[8], temp_out[8]; + + void (*invr)(int16_t*, int16_t*); + void (*invc)(int16_t*, int16_t*); + + switch (tx_type) { + case ADST_ADST: + invc = &iadst8_1d; + invr = &iadst8_1d; + break; + case ADST_DCT: + invc = &iadst8_1d; + invr = &idct8_1d; + break; + case DCT_ADST: + invc = &idct8_1d; + invr = &iadst8_1d; + break; + case DCT_DCT: + invc = &idct8_1d; + invr = &idct8_1d; + break; + default: + assert(0); + } + + // inverse transform row vectors + for (i = 0; i < 8; ++i) { + invr(input, outptr); + input += 8; + outptr += 8; + } + + // inverse transform column vectors + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + invc(temp_in, temp_out); + for (j = 0; j < 8; ++j) + output[j * short_pitch + i] = (temp_out[j] + 16) >> 5; + } +} +#endif + + void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) { int16_t out[8 * 8]; int16_t *outptr = &out[0]; diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index b5e6e3cc2..c81fe2d0d 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -91,8 +91,13 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { for (i = 0; i < 9; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { +#if CONFIG_INTHT + vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, + tx_type, 32); +#else vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8, xd->block[i].eob); +#endif } else { vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], &blockd[i].diff[0], 32); @@ -101,8 +106,13 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { for (i = 2; i < 11; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]); if (tx_type != DCT_DCT) { +#if CONFIG_INTHT + vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff, + tx_type, 32); +#else vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8, xd->block[i + 2].eob); +#endif } else { vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], &blockd[i].diff[0], 32); diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 5339aaa5f..5e4d485b5 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -411,6 +411,11 @@ specialize vp9_short_idct32x32 prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output" specialize vp9_short_idct1_32x32 +#if CONFIG_INTHT +prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int tx_type, int pitch" +specialize vp9_short_iht8x8 +#endif + prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs" specialize vp9_ihtllm diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 18d4e59c7..839a918fb 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -92,8 +92,11 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, input[i] = dq[1] * input[i]; } +#if CONFIG_INTHT + vp9_short_iht8x8(input, output, tx_type, 16); +#else vp9_ihtllm(input, output, 16, tx_type, 8, eobs); - +#endif vpx_memset(input, 0, 128); add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8); diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index dcd19ca42..d4f5c0c07 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -104,6 +104,26 @@ static const int16_t dct_i8[64] = { 16069, -13623, 9102, -3196 }; +#if CONFIG_INTHT +static const int16_t adst_i8[64] = { + 1606, 4756, 7723, 10394, + 12665, 14449, 15678, 16305, + 4756, 12665, 16305, 14449, + 7723, -1606, -10394, -15678, + 7723, 16305, 10394, -4756, + -15678, -12665, 1606, 14449, + 10394, 14449, -4756, -16305, + -1606, 15678, 7723, -12665, + 12665, 7723, -15678, -1606, + 16305, -4756, -14449, 10394, + 14449, -1606, -12665, 15678, + -4756, -10394, 16305, -7723, + 15678, -10394, 1606, 7723, + -14449, 16305, -12665, 4756, + 16305, -15678, 14449, -12665, + 10394, -7723, 4756, -1606 +}; +#else static const int16_t adst_i8[64] = { 2921, 5742, 8368, 10708, 12684, 14228, 15288, 15827, @@ -122,6 +142,7 @@ static const int16_t adst_i8[64] = { 5742, -10708, 14228, -15827, 15288, -12684, 8368, -2921 }; +#endif static const float dct_16[256] = { 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, @@ -229,6 +250,42 @@ static const int16_t dct_i16[256] = { 11529, -11086, 10217, -8955, 7350, -5461, 3363, -1136 }; +#if CONFIG_INTHT +static const int16_t adst_i16[256] = { + 568, 1700, 2815, 3903, 4953, 5956, 6901, 7780, + 8584, 9305, 9937, 10473, 10908, 11238, 11459, 11571, + 1700, 4953, 7780, 9937, 11238, 11571, 10908, 9305, + 6901, 3903, 568, -2815, -5956, -8584, -10473, -11459, + 2815, 7780, 10908, 11459, 9305, 4953, -568, -5956, + -9937, -11571, -10473, -6901, -1700, 3903, 8584, 11238, + 3903, 9937, 11459, 7780, 568, -6901, -11238, -10473, + -4953, 2815, 9305, 11571, 8584, 1700, -5956, -10908, + 4953, 11238, 9305, 568, -8584, -11459, -5956, 3903, + 10908, 9937, 1700, -7780, -11571, -6901, 2815, 10473, + 5956, 11571, 4953, -6901, -11459, -3903, 7780, 11238, + 2815, -8584, -10908, -1700, 9305, 10473, 568, -9937, + 6901, 10908, -568, -11238, -5956, 7780, 10473, -1700, + -11459, -4953, 8584, 9937, -2815, -11571, -3903, 9305, + 7780, 9305, -5956, -10473, 3903, 11238, -1700, -11571, + -568, 11459, 2815, -10908, -4953, 9937, 6901, -8584, + 8584, 6901, -9937, -4953, 10908, 2815, -11459, -568, + 11571, -1700, -11238, 3903, 10473, -5956, -9305, 7780, + 9305, 3903, -11571, 2815, 9937, -8584, -4953, 11459, + -1700, -10473, 7780, 5956, -11238, 568, 10908, -6901, + 9937, 568, -10473, 9305, 1700, -10908, 8584, 2815, + -11238, 7780, 3903, -11459, 6901, 4953, -11571, 5956, + 10473, -2815, -6901, 11571, -7780, -1700, 9937, -10908, + 3903, 5956, -11459, 8584, 568, -9305, 11238, -4953, + 10908, -5956, -1700, 8584, -11571, 9305, -2815, -4953, + 10473, -11238, 6901, 568, -7780, 11459, -9937, 3903, + 11238, -8584, 3903, 1700, -6901, 10473, -11571, 9937, + -5956, 568, 4953, -9305, 11459, -10908, 7780, -2815, + 11459, -10473, 8584, -5956, 2815, 568, -3903, 6901, + -9305, 10908, -11571, 11238, -9937, 7780, -4953, 1700, + 11571, -11459, 11238, -10908, 10473, -9937, 9305, -8584, + 7780, -6901, 5956, -4953, 3903, -2815, 1700, -568 +}; +#else static const int16_t adst_i16[256] = { 1084, 2159, 3214, 4240, 5228, 6168, 7052, 7873, 8622, 9293, 9880, 10377, 10781, 11087, 11292, 11395, @@ -263,6 +320,7 @@ static const int16_t adst_i16[256] = { 2159, -4240, 6168, -7873, 9293, -10377, 11087, -11395, 11292, -10781, 9880, -8622, 7052, -5228, 3214, -1084 }; +#endif static const int xC1S7 = 16069; static const int xC2S6 = 15137; diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index eacc2cd28..fa7229714 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -152,8 +152,14 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8); x->quantize_b_8x8(x->block + idx, xd->block + idx); + +#if CONFIG_INTHT + vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, + tx_type, 32); +#else vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, tx_type, 8, xd->block[idx].eob); +#endif } else { x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x->block + idx, xd->block + idx); diff --git a/vpxenc.c b/vpxenc.c index cb2569acf..10a606330 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -2472,7 +2472,6 @@ int main(int argc, const char **argv_) { " and --passes=2\n", stream->index, global.pass); }); - /* Use the frame rate from the file only if none was specified * on the command-line. */