Adding 8x16/16x8/32x16/16x32 transforms
Adds forward, inverse transforms and scan orders. Change-Id: Iab6994f4b0ef65e660b714d111b79b1c8172d6a8
This commit is contained in:
@@ -60,41 +60,41 @@ static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)
|
||||
};
|
||||
|
||||
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = { 1, 4,
|
||||
16, 64,
|
||||
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
|
||||
1, 4, 16, 64,
|
||||
#if CONFIG_EXT_TX
|
||||
2, 2
|
||||
2, 2, 8, 8, 32, 32
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = { 1, 2,
|
||||
4, 8,
|
||||
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
|
||||
1, 2, 4, 8,
|
||||
#if CONFIG_EXT_TX
|
||||
1, 2
|
||||
1, 2, 2, 4, 4, 8
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = { 1, 2,
|
||||
4, 8,
|
||||
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
|
||||
1, 2, 4, 8,
|
||||
#if CONFIG_EXT_TX
|
||||
2, 1
|
||||
2, 1, 4, 2, 8, 4
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = { 0, 2,
|
||||
4, 6,
|
||||
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
|
||||
0, 2, 4, 6,
|
||||
#if CONFIG_EXT_TX
|
||||
1, 1
|
||||
1, 1, 3, 3, 5, 5
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
|
||||
0, 1, 2, 3,
|
||||
#if CONFIG_EXT_TX
|
||||
0, 1
|
||||
0, 1, 1, 2, 2, 3
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
|
||||
0, 1, 2, 3,
|
||||
#if CONFIG_EXT_TX
|
||||
1, 0
|
||||
1, 0, 2, 1, 3, 2
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
@@ -374,9 +374,13 @@ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
|
||||
BLOCK_16X16, // TX_16X16
|
||||
BLOCK_32X32, // TX_32X32
|
||||
#if CONFIG_EXT_TX
|
||||
BLOCK_4X8, // TX_4X8
|
||||
BLOCK_8X4, // TX_8X4
|
||||
#endif // CONFIG_EXT_TX
|
||||
BLOCK_4X8, // TX_4X8
|
||||
BLOCK_8X4, // TX_8X4
|
||||
BLOCK_8X16, // TX_8X16
|
||||
BLOCK_16X8, // TX_16X8
|
||||
BLOCK_16X32, // TX_16X32
|
||||
BLOCK_32X16, // TX_32X16
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
|
||||
@@ -385,9 +389,13 @@ static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
|
||||
TX_16X16, // TX_16X16
|
||||
TX_32X32, // TX_32X32
|
||||
#if CONFIG_EXT_TX
|
||||
TX_4X4, // TX_4X8
|
||||
TX_4X4, // TX_8X4
|
||||
#endif // CONFIG_EXT_TX
|
||||
TX_4X4, // TX_4X8
|
||||
TX_4X4, // TX_8X4
|
||||
TX_8X8, // TX_8X16
|
||||
TX_8X8, // TX_16X8
|
||||
TX_16X16, // TX_16X32
|
||||
TX_16X16, // TX_32X16
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
|
||||
@@ -396,9 +404,13 @@ static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
|
||||
TX_16X16, // TX_16X16
|
||||
TX_32X32, // TX_32X32
|
||||
#if CONFIG_EXT_TX
|
||||
TX_8X8, // TX_4X8
|
||||
TX_8X8, // TX_8X4
|
||||
#endif // CONFIG_EXT_TX
|
||||
TX_8X8, // TX_4X8
|
||||
TX_8X8, // TX_8X4
|
||||
TX_16X16, // TX_8X16
|
||||
TX_16X16, // TX_16X8
|
||||
TX_32X32, // TX_16X32
|
||||
TX_32X32, // TX_32X16
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
|
||||
|
@@ -59,7 +59,12 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = {
|
||||
{ 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
|
||||
{ 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
|
||||
#if CONFIG_EXT_TX
|
||||
{ 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 },
|
||||
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
|
||||
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
|
||||
{ 1, 2, 3, 4, 11, 128 - 21, 0 },
|
||||
{ 1, 2, 3, 4, 11, 128 - 21, 0 },
|
||||
{ 1, 2, 3, 4, 11, 512 - 21, 0 },
|
||||
{ 1, 2, 3, 4, 11, 512 - 21, 0 },
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
@@ -67,7 +72,12 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
|
||||
{ 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
|
||||
{ 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
|
||||
#if CONFIG_EXT_TX
|
||||
{ 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 },
|
||||
{ 0, 1, 3, 6, 10, 18, 32, 0 },
|
||||
{ 0, 1, 3, 6, 10, 18, 32, 0 },
|
||||
{ 0, 1, 3, 6, 10, 21, 128, 0 },
|
||||
{ 0, 1, 3, 6, 10, 21, 128, 0 },
|
||||
{ 0, 1, 3, 6, 10, 21, 512, 0 },
|
||||
{ 0, 1, 3, 6, 10, 21, 512, 0 },
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
@@ -116,7 +126,7 @@ const uint8_t vp10_coefband_trans_8x8plus[1024] = {
|
||||
};
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
const uint8_t vp10_coefband_trans_8x4_4x8[32] = {
|
||||
const uint8_t vp10_coefband_trans_4x8_8x4[32] = {
|
||||
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
};
|
||||
|
@@ -156,7 +156,7 @@ void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col);
|
||||
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
|
||||
#if CONFIG_EXT_TX
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x4_4x8[32]);
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x8_8x4[32]);
|
||||
#endif // CONFIG_EXT_TX
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
|
||||
|
||||
@@ -169,7 +169,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
|
||||
case TX_4X4: return vp10_coefband_trans_4x4;
|
||||
#if CONFIG_EXT_TX
|
||||
case TX_4X8:
|
||||
case TX_8X4: return vp10_coefband_trans_8x4_4x8;
|
||||
return vp10_coefband_trans_4x8_8x4;
|
||||
#endif // CONFIG_EXT_TX
|
||||
default: return vp10_coefband_trans_8x8plus;
|
||||
}
|
||||
@@ -228,6 +228,22 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
|
||||
above_ec = !!*(const uint16_t *)a;
|
||||
left_ec = l[0] != 0;
|
||||
break;
|
||||
case TX_8X16:
|
||||
above_ec = !!*(const uint16_t *)a;
|
||||
left_ec = !!*(const uint32_t *)l;
|
||||
break;
|
||||
case TX_16X8:
|
||||
above_ec = !!*(const uint32_t *)a;
|
||||
left_ec = !!*(const uint16_t *)l;
|
||||
break;
|
||||
case TX_16X32:
|
||||
above_ec = !!*(const uint32_t *)a;
|
||||
left_ec = !!*(const uint64_t *)l;
|
||||
break;
|
||||
case TX_32X16:
|
||||
above_ec = !!*(const uint64_t *)a;
|
||||
left_ec = !!*(const uint32_t *)l;
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_8X8:
|
||||
above_ec = !!*(const uint16_t *)a;
|
||||
|
@@ -138,9 +138,13 @@ typedef uint8_t TX_SIZE;
|
||||
#define TX_SIZES ((TX_SIZE)4)
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
|
||||
#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
|
||||
#define TX_SIZES_ALL ((TX_SIZE)6) // Includes rectangular transforms
|
||||
#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
|
||||
#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
|
||||
#define TX_8X16 ((TX_SIZE)6) // 8x16 transform
|
||||
#define TX_16X8 ((TX_SIZE)7) // 16x8 transform
|
||||
#define TX_16X32 ((TX_SIZE)8) // 16x32 transform
|
||||
#define TX_32X16 ((TX_SIZE)9) // 32x16 transform
|
||||
#define TX_SIZES_ALL ((TX_SIZE)10) // Includes rectangular transforms
|
||||
#else
|
||||
#define TX_SIZES_ALL ((TX_SIZE)4)
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
@@ -540,6 +540,7 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
static const transform_2d IHT_4x8[] = {
|
||||
@@ -547,9 +548,8 @@ void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
{ iadst8_c, idct4_c }, // ADST_DCT
|
||||
{ idct8_c, iadst4_c }, // DCT_ADST
|
||||
{ iadst8_c, iadst4_c }, // ADST_ADST
|
||||
#if CONFIG_EXT_TX
|
||||
{ iadst8_c, idct4_c }, // FLIPADST_DCT
|
||||
{ idct8_c, iadst4_c }, // DCT_FLIPADST
|
||||
{ iadst8_c, idct4_c }, // FLIPADST_DCT
|
||||
{ idct8_c, iadst4_c }, // DCT_FLIPADST
|
||||
{ iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
|
||||
{ iadst8_c, iadst4_c }, // ADST_FLIPADST
|
||||
{ iadst8_c, iadst4_c }, // FLIPADST_ADST
|
||||
@@ -560,34 +560,33 @@ void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
{ iidtx8_c, iadst4_c }, // H_ADST
|
||||
{ iadst8_c, iidtx4_c }, // V_FLIPADST
|
||||
{ iidtx8_c, iadst4_c }, // H_FLIPADST
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
const int n = 4;
|
||||
const int n2 = 8;
|
||||
int i, j;
|
||||
tran_low_t out[4][8], outtmp[4];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 8;
|
||||
int outstride = n2;
|
||||
|
||||
// inverse transform row vectors and transpose
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (i = 0; i < n2; ++i) {
|
||||
IHT_4x8[tx_type].rows(input, outtmp);
|
||||
for (j = 0; j < 4; ++j)
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
|
||||
input += 4;
|
||||
input += n;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
IHT_4x8[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
|
||||
#endif
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
|
||||
@@ -602,9 +601,8 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
{ iadst4_c, idct8_c }, // ADST_DCT
|
||||
{ idct4_c, iadst8_c }, // DCT_ADST
|
||||
{ iadst4_c, iadst8_c }, // ADST_ADST
|
||||
#if CONFIG_EXT_TX
|
||||
{ iadst4_c, idct8_c }, // FLIPADST_DCT
|
||||
{ idct4_c, iadst8_c }, // DCT_FLIPADST
|
||||
{ iadst4_c, idct8_c }, // FLIPADST_DCT
|
||||
{ idct4_c, iadst8_c }, // DCT_FLIPADST
|
||||
{ iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
|
||||
{ iadst4_c, iadst8_c }, // ADST_FLIPADST
|
||||
{ iadst4_c, iadst8_c }, // FLIPADST_ADST
|
||||
@@ -615,34 +613,33 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
{ iidtx4_c, iadst8_c }, // H_ADST
|
||||
{ iadst4_c, iidtx8_c }, // V_FLIPADST
|
||||
{ iidtx4_c, iadst8_c }, // H_FLIPADST
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
const int n = 4;
|
||||
const int n2 = 8;
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[8][4], outtmp[8];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 4;
|
||||
int outstride = n;
|
||||
|
||||
// inverse transform row vectors and transpose
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
IHT_8x4[tx_type].rows(input, outtmp);
|
||||
for (j = 0; j < 8; ++j)
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
|
||||
input += 8;
|
||||
input += n2;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (i = 0; i < n2; ++i) {
|
||||
IHT_8x4[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 8);
|
||||
#endif
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
|
||||
@@ -650,6 +647,219 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int tx_type) {
|
||||
static const transform_2d IHT_8x16[] = {
|
||||
{ idct16_c, idct8_c }, // DCT_DCT
|
||||
{ iadst16_c, idct8_c }, // ADST_DCT
|
||||
{ idct16_c, iadst8_c }, // DCT_ADST
|
||||
{ iadst16_c, iadst8_c }, // ADST_ADST
|
||||
{ iadst16_c, idct8_c }, // FLIPADST_DCT
|
||||
{ idct16_c, iadst8_c }, // DCT_FLIPADST
|
||||
{ iadst16_c, iadst8_c }, // FLIPADST_FLIPADST
|
||||
{ iadst16_c, iadst8_c }, // ADST_FLIPADST
|
||||
{ iadst16_c, iadst8_c }, // FLIPADST_ADST
|
||||
{ iidtx16_c, iidtx8_c }, // IDTX
|
||||
{ idct16_c, iidtx8_c }, // V_DCT
|
||||
{ iidtx16_c, idct8_c }, // H_DCT
|
||||
{ iadst16_c, iidtx8_c }, // V_ADST
|
||||
{ iidtx16_c, iadst8_c }, // H_ADST
|
||||
{ iadst16_c, iidtx8_c }, // V_FLIPADST
|
||||
{ iidtx16_c, iadst8_c }, // H_FLIPADST
|
||||
};
|
||||
|
||||
const int n = 8;
|
||||
const int n2 = 16;
|
||||
int i, j;
|
||||
tran_low_t out[8][16], outtmp[8];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n2;
|
||||
|
||||
// inverse transform row vectors and transpose
|
||||
for (i = 0; i < n2; ++i) {
|
||||
IHT_8x16[tx_type].rows(input, outtmp);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
|
||||
input += n;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n; ++i) {
|
||||
IHT_8x16[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int tx_type) {
|
||||
static const transform_2d IHT_16x8[] = {
|
||||
{ idct8_c, idct16_c }, // DCT_DCT
|
||||
{ iadst8_c, idct16_c }, // ADST_DCT
|
||||
{ idct8_c, iadst16_c }, // DCT_ADST
|
||||
{ iadst8_c, iadst16_c }, // ADST_ADST
|
||||
{ iadst8_c, idct16_c }, // FLIPADST_DCT
|
||||
{ idct8_c, iadst16_c }, // DCT_FLIPADST
|
||||
{ iadst8_c, iadst16_c }, // FLIPADST_FLIPADST
|
||||
{ iadst8_c, iadst16_c }, // ADST_FLIPADST
|
||||
{ iadst8_c, iadst16_c }, // FLIPADST_ADST
|
||||
{ iidtx8_c, iidtx16_c }, // IDTX
|
||||
{ idct8_c, iidtx16_c }, // V_DCT
|
||||
{ iidtx8_c, idct16_c }, // H_DCT
|
||||
{ iadst8_c, iidtx16_c }, // V_ADST
|
||||
{ iidtx8_c, iadst16_c }, // H_ADST
|
||||
{ iadst8_c, iidtx16_c }, // V_FLIPADST
|
||||
{ iidtx8_c, iadst16_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 8;
|
||||
const int n2 = 16;
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[16][8], outtmp[16];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n;
|
||||
|
||||
// inverse transform row vectors and transpose
|
||||
for (i = 0; i < n; ++i) {
|
||||
IHT_16x8[tx_type].rows(input, outtmp);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
|
||||
input += n2;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n2; ++i) {
|
||||
IHT_16x8[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int tx_type) {
|
||||
static const transform_2d IHT_16x32[] = {
|
||||
{ idct32_c, idct16_c }, // DCT_DCT
|
||||
{ ihalfright32_c, idct16_c }, // ADST_DCT
|
||||
{ idct32_c, iadst16_c }, // DCT_ADST
|
||||
{ ihalfright32_c, iadst16_c }, // ADST_ADST
|
||||
{ ihalfright32_c, idct16_c }, // FLIPADST_DCT
|
||||
{ idct32_c, iadst16_c }, // DCT_FLIPADST
|
||||
{ ihalfright32_c, iadst16_c }, // FLIPADST_FLIPADST
|
||||
{ ihalfright32_c, iadst16_c }, // ADST_FLIPADST
|
||||
{ ihalfright32_c, iadst16_c }, // FLIPADST_ADST
|
||||
{ iidtx32_c, iidtx16_c }, // IDTX
|
||||
{ idct32_c, iidtx16_c }, // V_DCT
|
||||
{ iidtx32_c, idct16_c }, // H_DCT
|
||||
{ ihalfright32_c, iidtx16_c }, // V_ADST
|
||||
{ iidtx32_c, iadst16_c }, // H_ADST
|
||||
{ ihalfright32_c, iidtx16_c }, // V_FLIPADST
|
||||
{ iidtx32_c, iadst16_c }, // H_FLIPADST
|
||||
};
|
||||
|
||||
const int n = 16;
|
||||
const int n2 = 32;
|
||||
int i, j;
|
||||
tran_low_t out[16][32], outtmp[16];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n2;
|
||||
|
||||
// inverse transform row vectors and transpose
|
||||
for (i = 0; i < n2; ++i) {
|
||||
IHT_16x32[tx_type].rows(input, outtmp);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
|
||||
input += n;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n; ++i) {
|
||||
IHT_16x32[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int tx_type) {
|
||||
static const transform_2d IHT_32x16[] = {
|
||||
{ idct16_c, idct32_c }, // DCT_DCT
|
||||
{ iadst16_c, idct32_c }, // ADST_DCT
|
||||
{ idct16_c, ihalfright32_c }, // DCT_ADST
|
||||
{ iadst16_c, ihalfright32_c }, // ADST_ADST
|
||||
{ iadst16_c, idct32_c }, // FLIPADST_DCT
|
||||
{ idct16_c, ihalfright32_c }, // DCT_FLIPADST
|
||||
{ iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
|
||||
{ iadst16_c, ihalfright32_c }, // ADST_FLIPADST
|
||||
{ iadst16_c, ihalfright32_c }, // FLIPADST_ADST
|
||||
{ iidtx16_c, iidtx32_c }, // IDTX
|
||||
{ idct16_c, iidtx32_c }, // V_DCT
|
||||
{ iidtx16_c, idct32_c }, // H_DCT
|
||||
{ iadst16_c, iidtx32_c }, // V_ADST
|
||||
{ iidtx16_c, ihalfright32_c }, // H_ADST
|
||||
{ iadst16_c, iidtx32_c }, // V_FLIPADST
|
||||
{ iidtx16_c, ihalfright32_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 16;
|
||||
const int n2 = 32;
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[32][16], outtmp[32];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n;
|
||||
|
||||
// inverse transform row vectors and transpose
|
||||
for (i = 0; i < n; ++i) {
|
||||
IHT_32x16[tx_type].rows(input, outtmp);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
|
||||
input += n2;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n2; ++i) {
|
||||
IHT_32x16[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
static const transform_2d IHT_8[] = {
|
||||
@@ -930,16 +1140,40 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob, TX_TYPE tx_type) {
|
||||
(void)eob;
|
||||
void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_iht4x8_32_add(input, dest, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_iht8x4_32_add(input, dest, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob, TX_TYPE tx_type) {
|
||||
(void)eob;
|
||||
vp10_iht4x8_32_add(input, dest, stride, tx_type);
|
||||
void vp10_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_iht8x16_128_add(input, dest, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_iht16x8_128_add(input, dest, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_iht16x32_512_add(input, dest, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_iht32x16_512_add(input, dest, stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
@@ -1116,34 +1350,36 @@ void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
{ vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
|
||||
{ highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 4;
|
||||
const int n2 = 8;
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[4][8], outtmp[4];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 8;
|
||||
int outstride = n2;
|
||||
|
||||
// inverse transform row vectors, and transpose
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (i = 0; i < n2; ++i) {
|
||||
HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
|
||||
for (j = 0; j < 4; ++j) {
|
||||
out[j][i] =
|
||||
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
for (j = 0; j < n; ++j) {
|
||||
out[j][i] = HIGHBD_WRAPLOW(
|
||||
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
}
|
||||
input += 4;
|
||||
input += n;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 4);
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] =
|
||||
@@ -1172,34 +1408,36 @@ void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
{ vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
|
||||
{ highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 4;
|
||||
const int n2 = 8;
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[8][4], outtmp[8];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 4;
|
||||
int outstride = n;
|
||||
|
||||
// inverse transform row vectors, and transpose
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
out[j][i] =
|
||||
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
for (j = 0; j < n2; ++j) {
|
||||
out[j][i] = HIGHBD_WRAPLOW(
|
||||
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
}
|
||||
input += 8;
|
||||
input += n2;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (i = 0; i < n2; ++i) {
|
||||
HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 8);
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] =
|
||||
@@ -1207,6 +1445,234 @@ void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
static const highbd_transform_2d HIGH_IHT_8x16[] = {
|
||||
{ vpx_highbd_idct16_c, vpx_highbd_idct8_c }, // DCT_DCT
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_idct8_c }, // ADST_DCT
|
||||
{ vpx_highbd_idct16_c, vpx_highbd_iadst8_c }, // DCT_ADST
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // ADST_ADST
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
|
||||
{ vpx_highbd_idct16_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
|
||||
{ highbd_iidtx16_c, highbd_iidtx8_c }, // IDTX
|
||||
{ vpx_highbd_idct16_c, highbd_iidtx8_c }, // V_DCT
|
||||
{ highbd_iidtx16_c, vpx_highbd_idct8_c }, // H_DCT
|
||||
{ vpx_highbd_iadst16_c, highbd_iidtx8_c }, // V_ADST
|
||||
{ highbd_iidtx16_c, vpx_highbd_iadst8_c }, // H_ADST
|
||||
{ vpx_highbd_iadst16_c, highbd_iidtx8_c }, // V_FLIPADST
|
||||
{ highbd_iidtx16_c, vpx_highbd_iadst8_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 8;
|
||||
const int n2 = 16;
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[8][16], outtmp[8];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n2;
|
||||
|
||||
// inverse transform row vectors, and transpose
|
||||
for (i = 0; i < n2; ++i) {
|
||||
HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j][i] = HIGHBD_WRAPLOW(
|
||||
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
input += n;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n; ++i) {
|
||||
HIGH_IHT_8x16[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
static const highbd_transform_2d HIGH_IHT_16x8[] = {
|
||||
{ vpx_highbd_idct8_c, vpx_highbd_idct16_c }, // DCT_DCT
|
||||
{ vpx_highbd_iadst8_c, vpx_highbd_idct16_c }, // ADST_DCT
|
||||
{ vpx_highbd_idct8_c, vpx_highbd_iadst16_c }, // DCT_ADST
|
||||
{ vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // ADST_ADST
|
||||
{ vpx_highbd_iadst8_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
|
||||
{ vpx_highbd_idct8_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
|
||||
{ vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
|
||||
{ vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
|
||||
{ vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
|
||||
{ highbd_iidtx8_c, highbd_iidtx16_c }, // IDTX
|
||||
{ vpx_highbd_idct8_c, highbd_iidtx16_c }, // V_DCT
|
||||
{ highbd_iidtx8_c, vpx_highbd_idct16_c }, // H_DCT
|
||||
{ vpx_highbd_iadst8_c, highbd_iidtx16_c }, // V_ADST
|
||||
{ highbd_iidtx8_c, vpx_highbd_iadst16_c }, // H_ADST
|
||||
{ vpx_highbd_iadst8_c, highbd_iidtx16_c }, // V_FLIPADST
|
||||
{ highbd_iidtx8_c, vpx_highbd_iadst16_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 8;
|
||||
const int n2 = 16;
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[16][8], outtmp[16];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n;
|
||||
|
||||
// inverse transform row vectors, and transpose
|
||||
for (i = 0; i < n; ++i) {
|
||||
HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j][i] = HIGHBD_WRAPLOW(
|
||||
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
input += n2;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n2; ++i) {
|
||||
HIGH_IHT_16x8[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
static const highbd_transform_2d HIGH_IHT_16x32[] = {
|
||||
{ vpx_highbd_idct32_c, vpx_highbd_idct16_c }, // DCT_DCT
|
||||
{ highbd_ihalfright32_c, vpx_highbd_idct16_c }, // ADST_DCT
|
||||
{ vpx_highbd_idct32_c, vpx_highbd_iadst16_c }, // DCT_ADST
|
||||
{ highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // ADST_ADST
|
||||
{ highbd_ihalfright32_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
|
||||
{ vpx_highbd_idct32_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
|
||||
{ highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
|
||||
{ highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
|
||||
{ highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
|
||||
{ highbd_iidtx32_c, highbd_iidtx16_c }, // IDTX
|
||||
{ vpx_highbd_idct32_c, highbd_iidtx16_c }, // V_DCT
|
||||
{ highbd_iidtx32_c, vpx_highbd_idct16_c }, // H_DCT
|
||||
{ highbd_ihalfright32_c, highbd_iidtx16_c }, // V_ADST
|
||||
{ highbd_iidtx32_c, vpx_highbd_iadst16_c }, // H_ADST
|
||||
{ highbd_ihalfright32_c, highbd_iidtx16_c }, // V_FLIPADST
|
||||
{ highbd_iidtx32_c, vpx_highbd_iadst16_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 16;
|
||||
const int n2 = 32;
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[16][32], outtmp[16];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n2;
|
||||
|
||||
// inverse transform row vectors, and transpose
|
||||
for (i = 0; i < n2; ++i) {
|
||||
HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j][i] = HIGHBD_WRAPLOW(
|
||||
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
input += n;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n; ++i) {
|
||||
HIGH_IHT_16x32[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
static const highbd_transform_2d HIGH_IHT_32x16[] = {
|
||||
{ vpx_highbd_idct16_c, vpx_highbd_idct32_c }, // DCT_DCT
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_idct32_c }, // ADST_DCT
|
||||
{ vpx_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_ADST
|
||||
{ vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_ADST
|
||||
{ vpx_highbd_iadst16_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
|
||||
{ vpx_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_FLIPADST
|
||||
{ vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
|
||||
{ vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_FLIPADST
|
||||
{ vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_ADST
|
||||
{ highbd_iidtx16_c, highbd_iidtx32_c }, // IDTX
|
||||
{ vpx_highbd_idct16_c, highbd_iidtx32_c }, // V_DCT
|
||||
{ highbd_iidtx16_c, vpx_highbd_idct32_c }, // H_DCT
|
||||
{ vpx_highbd_iadst16_c, highbd_iidtx32_c }, // V_ADST
|
||||
{ highbd_iidtx16_c, highbd_ihalfright32_c }, // H_ADST
|
||||
{ vpx_highbd_iadst16_c, highbd_iidtx32_c }, // V_FLIPADST
|
||||
{ highbd_iidtx16_c, highbd_ihalfright32_c }, // H_FLIPADST
|
||||
};
|
||||
const int n = 16;
|
||||
const int n2 = 32;
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[32][16], outtmp[32];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = n;
|
||||
|
||||
// inverse transform row vectors, and transpose
|
||||
for (i = 0; i < n; ++i) {
|
||||
HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j][i] = HIGHBD_WRAPLOW(
|
||||
highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
|
||||
input += n2;
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < n2; ++i) {
|
||||
HIGH_IHT_32x16[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
@@ -1511,19 +1977,47 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void)eob;
|
||||
vp10_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
|
||||
void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void)eob;
|
||||
(void) eob;
|
||||
vp10_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
|
||||
void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
|
||||
void vp10_highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
|
||||
void vp10_highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
|
||||
void vp10_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
|
||||
void vp10_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd,
|
||||
TX_TYPE tx_type) {
|
||||
(void) eob;
|
||||
vp10_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
|
||||
@@ -1661,6 +2155,18 @@ void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
case TX_8X4:
|
||||
vp10_inv_txfm_add_8x4(input, dest, stride, eob, tx_type);
|
||||
break;
|
||||
case TX_8X16:
|
||||
vp10_inv_txfm_add_8x16(input, dest, stride, eob, tx_type);
|
||||
break;
|
||||
case TX_16X8:
|
||||
vp10_inv_txfm_add_16x8(input, dest, stride, eob, tx_type);
|
||||
break;
|
||||
case TX_16X32:
|
||||
vp10_inv_txfm_add_16x32(input, dest, stride, eob, tx_type);
|
||||
break;
|
||||
case TX_32X16:
|
||||
vp10_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_4X4:
|
||||
// this is like vp10_short_idct4x4 but has a special case around eob<=1
|
||||
@@ -1698,6 +2204,18 @@ void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
case TX_8X4:
|
||||
vp10_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
|
||||
break;
|
||||
case TX_8X16:
|
||||
vp10_highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
|
||||
break;
|
||||
case TX_16X8:
|
||||
vp10_highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
|
||||
break;
|
||||
case TX_16X32:
|
||||
vp10_highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
|
||||
break;
|
||||
case TX_32X16:
|
||||
vp10_highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_4X4:
|
||||
// this is like vp10_short_idct4x4 but has a special case around eob<=1
|
||||
|
1765
vp10/common/scan.c
1765
vp10/common/scan.c
File diff suppressed because it is too large
Load Diff
@@ -60,11 +60,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x4_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x16_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x8_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x32_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht32x16_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x8_64_add/;
|
||||
@@ -126,11 +138,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x4_16_add sse2/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x4_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x16_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x8_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x32_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht32x16_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x8_64_add sse2/;
|
||||
@@ -195,11 +219,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x4_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x16_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x8_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x32_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht32x16_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x8_64_add/;
|
||||
@@ -237,11 +273,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x4_16_add sse2 neon dspr2/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x4_32_add/;
|
||||
|
||||
add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht4x8_32_add/;
|
||||
add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x16_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x8_128_add/;
|
||||
|
||||
add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht16x32_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht32x16_512_add/;
|
||||
|
||||
add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp10_iht8x8_64_add sse2 neon dspr2/;
|
||||
@@ -382,11 +430,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht4x4_16_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht4x8_32_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht8x4_32_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht4x8_32_add/;
|
||||
add_proto qw/void vp10_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht8x16_128_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht16x8_128_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht16x32_512_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht32x16_512_add/;
|
||||
|
||||
add_proto qw/void vp10_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
|
||||
specialize qw/vp10_highbd_iht8x8_64_add/;
|
||||
@@ -439,11 +499,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x4 sse2/;
|
||||
|
||||
add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x8/;
|
||||
|
||||
add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht8x4/;
|
||||
|
||||
add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x8/;
|
||||
add_proto qw/void vp10_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht8x16/;
|
||||
|
||||
add_proto qw/void vp10_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht16x8/;
|
||||
|
||||
add_proto qw/void vp10_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht16x32/;
|
||||
|
||||
add_proto qw/void vp10_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht32x16/;
|
||||
|
||||
add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht8x8 sse2/;
|
||||
@@ -460,11 +532,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x4 sse2/;
|
||||
|
||||
add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x8/;
|
||||
|
||||
add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht8x4/;
|
||||
|
||||
add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x8/;
|
||||
add_proto qw/void vp10_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht8x16/;
|
||||
|
||||
add_proto qw/void vp10_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht16x8/;
|
||||
|
||||
add_proto qw/void vp10_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht16x32/;
|
||||
|
||||
add_proto qw/void vp10_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht32x16/;
|
||||
|
||||
add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht8x8 sse2/;
|
||||
@@ -743,11 +827,23 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht4x4 sse4_1/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht4x8/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht8x4/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht4x8/;
|
||||
add_proto qw/void vp10_highbd_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht8x16/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht16x8/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht16x32/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht32x16/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht8x8/;
|
||||
|
@@ -1251,6 +1251,82 @@ static const transform_2d FHT_8x4[] = {
|
||||
{ fadst4, fidtx8 }, // V_FLIPADST
|
||||
{ fidtx4, fadst8 }, // H_FLIPADST
|
||||
};
|
||||
|
||||
static const transform_2d FHT_8x16[] = {
|
||||
{ fdct16, fdct8 }, // DCT_DCT
|
||||
{ fadst16, fdct8 }, // ADST_DCT
|
||||
{ fdct16, fadst8 }, // DCT_ADST
|
||||
{ fadst16, fadst8 }, // ADST_ADST
|
||||
{ fadst16, fdct8 }, // FLIPADST_DCT
|
||||
{ fdct16, fadst8 }, // DCT_FLIPADST
|
||||
{ fadst16, fadst8 }, // FLIPADST_FLIPADST
|
||||
{ fadst16, fadst8 }, // ADST_FLIPADST
|
||||
{ fadst16, fadst8 }, // FLIPADST_ADST
|
||||
{ fidtx16, fidtx8 }, // IDTX
|
||||
{ fdct16, fidtx8 }, // V_DCT
|
||||
{ fidtx16, fdct8 }, // H_DCT
|
||||
{ fadst16, fidtx8 }, // V_ADST
|
||||
{ fidtx16, fadst8 }, // H_ADST
|
||||
{ fadst16, fidtx8 }, // V_FLIPADST
|
||||
{ fidtx16, fadst8 }, // H_FLIPADST
|
||||
};
|
||||
|
||||
static const transform_2d FHT_16x8[] = {
|
||||
{ fdct8, fdct16 }, // DCT_DCT
|
||||
{ fadst8, fdct16 }, // ADST_DCT
|
||||
{ fdct8, fadst16 }, // DCT_ADST
|
||||
{ fadst8, fadst16 }, // ADST_ADST
|
||||
{ fadst8, fdct16 }, // FLIPADST_DCT
|
||||
{ fdct8, fadst16 }, // DCT_FLIPADST
|
||||
{ fadst8, fadst16 }, // FLIPADST_FLIPADST
|
||||
{ fadst8, fadst16 }, // ADST_FLIPADST
|
||||
{ fadst8, fadst16 }, // FLIPADST_ADST
|
||||
{ fidtx8, fidtx16 }, // IDTX
|
||||
{ fdct8, fidtx16 }, // V_DCT
|
||||
{ fidtx8, fdct16 }, // H_DCT
|
||||
{ fadst8, fidtx16 }, // V_ADST
|
||||
{ fidtx8, fadst16 }, // H_ADST
|
||||
{ fadst8, fidtx16 }, // V_FLIPADST
|
||||
{ fidtx8, fadst16 }, // H_FLIPADST
|
||||
};
|
||||
|
||||
static const transform_2d FHT_16x32[] = {
|
||||
{ fdct32, fdct16 }, // DCT_DCT
|
||||
{ fhalfright32, fdct16 }, // ADST_DCT
|
||||
{ fdct32, fadst16 }, // DCT_ADST
|
||||
{ fhalfright32, fadst16 }, // ADST_ADST
|
||||
{ fhalfright32, fdct16 }, // FLIPADST_DCT
|
||||
{ fdct32, fadst16 }, // DCT_FLIPADST
|
||||
{ fhalfright32, fadst16 }, // FLIPADST_FLIPADST
|
||||
{ fhalfright32, fadst16 }, // ADST_FLIPADST
|
||||
{ fhalfright32, fadst16 }, // FLIPADST_ADST
|
||||
{ fidtx32, fidtx16 }, // IDTX
|
||||
{ fdct32, fidtx16 }, // V_DCT
|
||||
{ fidtx32, fdct16 }, // H_DCT
|
||||
{ fhalfright32, fidtx16 }, // V_ADST
|
||||
{ fidtx32, fadst16 }, // H_ADST
|
||||
{ fhalfright32, fidtx16 }, // V_FLIPADST
|
||||
{ fidtx32, fadst16 }, // H_FLIPADST
|
||||
};
|
||||
|
||||
static const transform_2d FHT_32x16[] = {
|
||||
{ fdct16, fdct32 }, // DCT_DCT
|
||||
{ fadst16, fdct32 }, // ADST_DCT
|
||||
{ fdct16, fhalfright32 }, // DCT_ADST
|
||||
{ fadst16, fhalfright32 }, // ADST_ADST
|
||||
{ fadst16, fdct32 }, // FLIPADST_DCT
|
||||
{ fdct16, fhalfright32 }, // DCT_FLIPADST
|
||||
{ fadst16, fhalfright32 }, // FLIPADST_FLIPADST
|
||||
{ fadst16, fhalfright32 }, // ADST_FLIPADST
|
||||
{ fadst16, fhalfright32 }, // FLIPADST_ADST
|
||||
{ fidtx16, fidtx32 }, // IDTX
|
||||
{ fdct16, fidtx32 }, // V_DCT
|
||||
{ fidtx16, fdct32 }, // H_DCT
|
||||
{ fadst16, fidtx32 }, // V_ADST
|
||||
{ fidtx16, fhalfright32 }, // H_ADST
|
||||
{ fadst16, fidtx32 }, // V_FLIPADST
|
||||
{ fidtx16, fhalfright32 }, // H_FLIPADST
|
||||
};
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
@@ -1299,10 +1375,12 @@ void vp10_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j) temp_in[j] = input[j * stride + i] * 8;
|
||||
for (j = 0; j < n2; ++j)
|
||||
temp_in[j] = (tran_low_t)fdct_round_shift(
|
||||
input[j * stride + i] * 8 * Sqrt2);
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j * n + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2);
|
||||
out[j * n + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
@@ -1327,10 +1405,12 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 8;
|
||||
for (j = 0; j < n; ++j)
|
||||
temp_in[j] = (tran_low_t)fdct_round_shift(
|
||||
input[j * stride + i] * 8 * Sqrt2);
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j * n2 + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2);
|
||||
out[j * n2 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
@@ -1341,6 +1421,137 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
}
|
||||
// Note: overall scale factor of transform is 8 times unitary
|
||||
}
|
||||
|
||||
void vp10_fht8x16_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
const int n = 8;
|
||||
const int n2 = 16;
|
||||
tran_low_t out[16 * 8];
|
||||
tran_low_t temp_in[16], temp_out[16];
|
||||
int i, j;
|
||||
const transform_2d ht = FHT_8x16[tx_type];
|
||||
int16_t flipped_input[16 * 8];
|
||||
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j)
|
||||
temp_in[j] = (tran_low_t)fdct_round_shift(
|
||||
input[j * stride + i] * 4 * Sqrt2);
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j * n + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j)
|
||||
temp_in[j] = out[j + i * n];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < n; ++j)
|
||||
output[j + i * n] = (temp_out[j] + 1) >> 1;
|
||||
}
|
||||
// Note: overall scale factor of transform is 8 times unitary
|
||||
}
|
||||
|
||||
void vp10_fht16x8_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
const int n = 8;
|
||||
const int n2 = 16;
|
||||
tran_low_t out[16 * 8];
|
||||
tran_low_t temp_in[16], temp_out[16];
|
||||
int i, j;
|
||||
const transform_2d ht = FHT_16x8[tx_type];
|
||||
int16_t flipped_input[16 * 8];
|
||||
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j)
|
||||
temp_in[j] = (tran_low_t)fdct_round_shift(
|
||||
input[j * stride + i] * 4 * Sqrt2);
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j * n2 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j)
|
||||
temp_in[j] = out[j + i * n2];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < n2; ++j)
|
||||
output[j + i * n2] = (temp_out[j] + 1) >> 1;
|
||||
}
|
||||
// Note: overall scale factor of transform is 8 times unitary
|
||||
}
|
||||
|
||||
void vp10_fht16x32_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
const int n = 16;
|
||||
const int n2 = 32;
|
||||
tran_low_t out[32 * 16];
|
||||
tran_low_t temp_in[32], temp_out[32];
|
||||
int i, j;
|
||||
const transform_2d ht = FHT_16x32[tx_type];
|
||||
int16_t flipped_input[32 * 16];
|
||||
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j)
|
||||
temp_in[j] = (tran_low_t)fdct_round_shift(
|
||||
input[j * stride + i] * Sqrt2);
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < n2; ++j)
|
||||
out[j * n + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j)
|
||||
temp_in[j] = out[j + i * n];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < n; ++j)
|
||||
output[j + i * n] =
|
||||
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
|
||||
}
|
||||
// Note: overall scale factor of transform is 4 times unitary
|
||||
}
|
||||
|
||||
void vp10_fht32x16_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
const int n = 16;
|
||||
const int n2 = 32;
|
||||
tran_low_t out[32 * 16];
|
||||
tran_low_t temp_in[32], temp_out[32];
|
||||
int i, j;
|
||||
const transform_2d ht = FHT_32x16[tx_type];
|
||||
int16_t flipped_input[32 * 16];
|
||||
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < n2; ++i) {
|
||||
for (j = 0; j < n; ++j)
|
||||
temp_in[j] = (tran_low_t)fdct_round_shift(
|
||||
input[j * stride + i] * Sqrt2);
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < n; ++j)
|
||||
out[j * n2 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < n2; ++j)
|
||||
temp_in[j] = out[j + i * n2];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < n2; ++j)
|
||||
output[j + i * n2] =
|
||||
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
|
||||
}
|
||||
// Note: overall scale factor of transform is 4 times unitary
|
||||
}
|
||||
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
|
||||
@@ -1578,14 +1789,34 @@ void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
int tx_type) {
|
||||
void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht4x8_c(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht8x4_c(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
|
||||
int tx_type) {
|
||||
vp10_fht4x8_c(input, output, stride, tx_type);
|
||||
void vp10_highbd_fht8x16_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht8x16_c(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_highbd_fht16x8_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht16x8_c(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_highbd_fht16x32_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht16x32_c(input, output, stride, tx_type);
|
||||
}
|
||||
|
||||
void vp10_highbd_fht32x16_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht32x16_c(input, output, stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
|
@@ -55,19 +55,47 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void)fwd_txfm_opt;
|
||||
vp10_fht8x4(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void)fwd_txfm_opt;
|
||||
(void) fwd_txfm_opt;
|
||||
vp10_fht4x8(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void) fwd_txfm_opt;
|
||||
vp10_fht8x4(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void) fwd_txfm_opt;
|
||||
vp10_fht8x16(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void) fwd_txfm_opt;
|
||||
vp10_fht16x8(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void) fwd_txfm_opt;
|
||||
vp10_fht16x32(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
(void) fwd_txfm_opt;
|
||||
vp10_fht32x16(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
@@ -213,21 +241,53 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void)fwd_txfm_opt;
|
||||
(void)bd;
|
||||
vp10_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void)fwd_txfm_opt;
|
||||
(void)bd;
|
||||
(void) fwd_txfm_opt;
|
||||
(void) bd;
|
||||
vp10_highbd_fht4x8(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void) fwd_txfm_opt;
|
||||
(void) bd;
|
||||
vp10_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void) fwd_txfm_opt;
|
||||
(void) bd;
|
||||
vp10_highbd_fht8x16(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void) fwd_txfm_opt;
|
||||
(void) bd;
|
||||
vp10_highbd_fht16x8(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void) fwd_txfm_opt;
|
||||
(void) bd;
|
||||
vp10_highbd_fht16x32(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
|
||||
static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
(void) fwd_txfm_opt;
|
||||
(void) bd;
|
||||
vp10_highbd_fht32x16(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
@@ -361,6 +421,18 @@ void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
|
||||
case TX_8X4:
|
||||
fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
case TX_8X16:
|
||||
fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
case TX_16X8:
|
||||
fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
case TX_16X32:
|
||||
fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
case TX_32X16:
|
||||
fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_4X4:
|
||||
fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
|
||||
@@ -400,11 +472,30 @@ void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
|
||||
highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
|
||||
bd);
|
||||
break;
|
||||
case TX_8X16:
|
||||
highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type,
|
||||
fwd_txfm_opt, bd);
|
||||
break;
|
||||
case TX_16X8:
|
||||
highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type,
|
||||
fwd_txfm_opt, bd);
|
||||
break;
|
||||
case TX_16X32:
|
||||
highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type,
|
||||
fwd_txfm_opt, bd);
|
||||
break;
|
||||
case TX_32X16:
|
||||
highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type,
|
||||
fwd_txfm_opt, bd);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_4X4:
|
||||
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
|
||||
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type,
|
||||
lossless, bd);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@@ -571,18 +571,6 @@ static void get_entropy_contexts_plane(
|
||||
memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
|
||||
memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case TX_4X8:
|
||||
memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
|
||||
for (i = 0; i < num_4x4_h; i += 2)
|
||||
t_left[i] = !!*(const uint16_t *)&left[i];
|
||||
break;
|
||||
case TX_8X4:
|
||||
for (i = 0; i < num_4x4_w; i += 2)
|
||||
t_above[i] = !!*(const uint16_t *)&above[i];
|
||||
memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_8X8:
|
||||
for (i = 0; i < num_4x4_w; i += 2)
|
||||
t_above[i] = !!*(const uint16_t *)&above[i];
|
||||
@@ -601,7 +589,45 @@ static void get_entropy_contexts_plane(
|
||||
for (i = 0; i < num_4x4_h; i += 8)
|
||||
t_left[i] = !!*(const uint64_t *)&left[i];
|
||||
break;
|
||||
default: assert(0 && "Invalid transform size."); break;
|
||||
#if CONFIG_EXT_TX
|
||||
case TX_4X8:
|
||||
memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
|
||||
for (i = 0; i < num_4x4_h; i += 2)
|
||||
t_left[i] = !!*(const uint16_t *)&left[i];
|
||||
break;
|
||||
case TX_8X4:
|
||||
for (i = 0; i < num_4x4_w; i += 2)
|
||||
t_above[i] = !!*(const uint16_t *)&above[i];
|
||||
memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
|
||||
break;
|
||||
case TX_8X16:
|
||||
for (i = 0; i < num_4x4_w; i += 2)
|
||||
t_above[i] = !!*(const uint16_t *)&above[i];
|
||||
for (i = 0; i < num_4x4_h; i += 4)
|
||||
t_left[i] = !!*(const uint32_t *)&left[i];
|
||||
break;
|
||||
case TX_16X8:
|
||||
for (i = 0; i < num_4x4_w; i += 4)
|
||||
t_above[i] = !!*(const uint32_t *)&above[i];
|
||||
for (i = 0; i < num_4x4_h; i += 2)
|
||||
t_left[i] = !!*(const uint16_t *)&left[i];
|
||||
break;
|
||||
case TX_16X32:
|
||||
for (i = 0; i < num_4x4_w; i += 4)
|
||||
t_above[i] = !!*(const uint32_t *)&above[i];
|
||||
for (i = 0; i < num_4x4_h; i += 8)
|
||||
t_left[i] = !!*(const uint64_t *)&left[i];
|
||||
break;
|
||||
case TX_32X16:
|
||||
for (i = 0; i < num_4x4_w; i += 8)
|
||||
t_above[i] = !!*(const uint64_t *)&above[i];
|
||||
for (i = 0; i < num_4x4_h; i += 4)
|
||||
t_left[i] = !!*(const uint32_t *)&left[i];
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
default:
|
||||
assert(0 && "Invalid transform size.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1107,6 +1107,22 @@ static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
|
||||
sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
|
||||
vpx_sum_squares_2d_i16(diff + 4, diff_stride, 4);
|
||||
break;
|
||||
case TX_8X16:
|
||||
sse = vpx_sum_squares_2d_i16(diff, diff_stride, 8) +
|
||||
vpx_sum_squares_2d_i16(diff + 8 * diff_stride, diff_stride, 8);
|
||||
break;
|
||||
case TX_16X8:
|
||||
sse = vpx_sum_squares_2d_i16(diff, diff_stride, 8) +
|
||||
vpx_sum_squares_2d_i16(diff + 8, diff_stride, 8);
|
||||
break;
|
||||
case TX_16X32:
|
||||
sse = vpx_sum_squares_2d_i16(diff, diff_stride, 16) +
|
||||
vpx_sum_squares_2d_i16(diff + 16 * diff_stride, diff_stride, 16);
|
||||
break;
|
||||
case TX_32X16:
|
||||
sse = vpx_sum_squares_2d_i16(diff, diff_stride, 16) +
|
||||
vpx_sum_squares_2d_i16(diff + 16, diff_stride, 16);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
default:
|
||||
assert(tx_size < TX_SIZES);
|
||||
|
Reference in New Issue
Block a user