Merge "butterfly inverse 4x4 ADST" into experimental
This commit is contained in:
commit
f1060e4cd8
1
configure
vendored
1
configure
vendored
@ -250,6 +250,7 @@ EXPERIMENT_LIST="
|
||||
enable_6tap
|
||||
abovesprefmv
|
||||
intht
|
||||
intht4x4
|
||||
"
|
||||
CONFIG_LIST="
|
||||
external_build
|
||||
|
@ -408,7 +408,7 @@ typedef struct macroblockd {
|
||||
|
||||
#define ACTIVE_HT8 300
|
||||
|
||||
#define ACTIVE_HT16 300
|
||||
#define ACTIVE_HT16 0
|
||||
|
||||
// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
|
||||
static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
|
||||
|
@ -50,6 +50,14 @@ static const int cospi_29_64 = 2404;
|
||||
static const int cospi_30_64 = 1606;
|
||||
static const int cospi_31_64 = 804;
|
||||
|
||||
#if CONFIG_INTHT4X4
|
||||
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
|
||||
static const int sinpi_1_9 = 5283;
|
||||
static const int sinpi_2_9 = 9929;
|
||||
static const int sinpi_3_9 = 13377;
|
||||
static const int sinpi_4_9 = 15212;
|
||||
#endif
|
||||
|
||||
static INLINE int dct_const_round_shift(int input) {
|
||||
int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
|
||||
assert((rv <= INT16_MAX) && (rv >= INT16_MIN));
|
||||
|
@ -494,7 +494,6 @@ void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void idct4_1d(int16_t *input, int16_t *output) {
|
||||
int16_t step[4];
|
||||
int temp1, temp2;
|
||||
@ -651,6 +650,100 @@ void vp9_short_idct8x8_c(int16_t *input, int16_t *output, int pitch) {
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_INTHT4X4
|
||||
static void iadst4_1d(int16_t *input, int16_t *output) {
|
||||
int x0, x1, x2, x3;
|
||||
int s0, s1, s2, s3, s4, s5, s6, s7;
|
||||
|
||||
x0 = input[0];
|
||||
x1 = input[1];
|
||||
x2 = input[2];
|
||||
x3 = input[3];
|
||||
|
||||
if (!(x0 | x1 | x2 | x3)) {
|
||||
output[0] = output[1] = output[2] = output[3] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
s0 = sinpi_1_9 * x0;
|
||||
s1 = sinpi_2_9 * x0;
|
||||
s2 = sinpi_3_9 * x1;
|
||||
s3 = sinpi_4_9 * x2;
|
||||
s4 = sinpi_1_9 * x2;
|
||||
s5 = sinpi_2_9 * x3;
|
||||
s6 = sinpi_4_9 * x3;
|
||||
s7 = x0 - x2 + x3;
|
||||
|
||||
x0 = s0 + s3 + s5;
|
||||
x1 = s1 - s4 - s6;
|
||||
x2 = sinpi_3_9 * s7;
|
||||
x3 = s2;
|
||||
|
||||
s0 = x0 + x3;
|
||||
s1 = x1 + x3;
|
||||
s2 = x2;
|
||||
s3 = x0 + x1 - x3;
|
||||
|
||||
// 1-D transform scaling factor is sqrt(2).
|
||||
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
|
||||
// + 1b (addition) = 29b.
|
||||
// Hence the output bit depth is 15b.
|
||||
output[0] = dct_const_round_shift(s0);
|
||||
output[1] = dct_const_round_shift(s1);
|
||||
output[2] = dct_const_round_shift(s2);
|
||||
output[3] = dct_const_round_shift(s3);
|
||||
}
|
||||
|
||||
void vp9_short_iht4x4_c(int16_t *input, int16_t *output,
|
||||
int pitch, TX_TYPE tx_type) {
|
||||
int16_t out[16];
|
||||
int16_t *outptr = &out[0];
|
||||
const int short_pitch = pitch >> 1;
|
||||
int i, j;
|
||||
int16_t temp_in[4], temp_out[4];
|
||||
|
||||
void (*invr)(int16_t*, int16_t*);
|
||||
void (*invc)(int16_t*, int16_t*);
|
||||
|
||||
switch (tx_type) {
|
||||
case ADST_ADST:
|
||||
invc = &iadst4_1d;
|
||||
invr = &iadst4_1d;
|
||||
break;
|
||||
case ADST_DCT:
|
||||
invc = &iadst4_1d;
|
||||
invr = &idct4_1d;
|
||||
break;
|
||||
case DCT_ADST:
|
||||
invc = &idct4_1d;
|
||||
invr = &iadst4_1d;
|
||||
break;
|
||||
case DCT_DCT:
|
||||
invc = &idct4_1d;
|
||||
invr = &idct4_1d;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
invr(input, outptr);
|
||||
input += 4;
|
||||
outptr += 4;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = out[j * 4 + i];
|
||||
invc(temp_in, temp_out);
|
||||
for (j = 0; j < 4; ++j)
|
||||
output[j * short_pitch + i] = (temp_out[j] + 8) >> 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_INTHT
|
||||
static void iadst8_1d(int16_t *input, int16_t *output) {
|
||||
int x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
@ -733,7 +826,7 @@ static void iadst8_1d(int16_t *input, int16_t *output) {
|
||||
}
|
||||
|
||||
void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
|
||||
TX_TYPE tx_type, int pitch) {
|
||||
int pitch, TX_TYPE tx_type) {
|
||||
int16_t out[8 * 8];
|
||||
int16_t *outptr = &out[0];
|
||||
const int short_pitch = pitch >> 1;
|
||||
|
@ -51,8 +51,13 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
|
||||
for (i = 0; i < 16; i++) {
|
||||
TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
|
||||
if (tx_type != DCT_DCT) {
|
||||
#if CONFIG_INTHT4X4
|
||||
vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff,
|
||||
32, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
|
||||
tx_type, 4, xd->block[i].eob);
|
||||
#endif
|
||||
} else {
|
||||
vp9_inverse_transform_b_4x4(xd, i, 32);
|
||||
}
|
||||
@ -93,7 +98,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
|
||||
if (tx_type != DCT_DCT) {
|
||||
#if CONFIG_INTHT
|
||||
vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff,
|
||||
tx_type, 32);
|
||||
32, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
|
||||
xd->block[i].eob);
|
||||
@ -108,7 +113,7 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
|
||||
if (tx_type != DCT_DCT) {
|
||||
#if CONFIG_INTHT
|
||||
vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
|
||||
tx_type, 32);
|
||||
32, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
|
||||
xd->block[i + 2].eob);
|
||||
|
@ -300,10 +300,15 @@ prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
|
||||
specialize vp9_short_idct1_32x32
|
||||
|
||||
#if CONFIG_INTHT
|
||||
prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int tx_type, int pitch"
|
||||
prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int pitch, int tx_type"
|
||||
specialize vp9_short_iht8x8
|
||||
#endif
|
||||
|
||||
#if CONFIG_INTHT4X4
|
||||
prototype void vp9_short_iht4x4 "int16_t *input, int16_t *output, int pitch, int tx_type"
|
||||
specialize vp9_short_iht4x4
|
||||
#endif
|
||||
|
||||
prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"
|
||||
specialize vp9_ihtllm
|
||||
|
||||
|
@ -69,7 +69,11 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
#if CONFIG_INTHT4X4
|
||||
vp9_short_iht4x4(input, output, 8, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);
|
||||
#endif
|
||||
|
||||
vpx_memset(input, 0, 32);
|
||||
|
||||
@ -93,7 +97,7 @@ void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
|
||||
}
|
||||
|
||||
#if CONFIG_INTHT
|
||||
vp9_short_iht8x8(input, output, tx_type, 16);
|
||||
vp9_short_iht8x8(input, output, 16, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
|
||||
#endif
|
||||
|
@ -56,7 +56,11 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
|
||||
vp9_ht_quantize_b_4x4(be, b, tx_type);
|
||||
#if CONFIG_INTHT4X4
|
||||
vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
|
||||
#endif
|
||||
} else {
|
||||
x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4(be, b) ;
|
||||
@ -155,7 +159,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
|
||||
|
||||
#if CONFIG_INTHT
|
||||
vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
|
||||
tx_type, 32);
|
||||
32, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
|
||||
tx_type, 8, xd->block[idx].eob);
|
||||
@ -173,7 +177,11 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
|
||||
if (tx_type != DCT_DCT) {
|
||||
vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
|
||||
vp9_ht_quantize_b_4x4(be, b, tx_type);
|
||||
#if CONFIG_INTHT4X4
|
||||
vp9_short_iht4x4(b->dqcoeff, b->diff, 32, tx_type);
|
||||
#else
|
||||
vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
|
||||
#endif
|
||||
} else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
|
||||
x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
|
||||
x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
|
||||
|
@ -1170,7 +1170,11 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
|
||||
|
||||
// inverse transform
|
||||
if (best_tx_type != DCT_DCT)
|
||||
#if CONFIG_INTHT4X4
|
||||
vp9_short_iht4x4(best_dqcoeff, b->diff, 32, best_tx_type);
|
||||
#else
|
||||
vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
|
||||
#endif
|
||||
else
|
||||
xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user