Make rectangular transform block available in the common lib

This prepares the integration of rectangular transform block size
with recursive transform block partition system.

Change-Id: Id96aa3790dace15619c665f438241938992d1730
This commit is contained in:
Jingning Han
2016-11-01 18:19:30 -07:00
parent aad298ffcf
commit ec419e0771
6 changed files with 262 additions and 126 deletions

View File

@@ -61,25 +61,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add/;
if (aom_config("CONFIG_EXT_TX") eq "yes") {
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x8_32_add/;
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x4_32_add/;
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x16_128_add/;
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x8_128_add/;
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x32_512_add/;
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht32x16_512_add/;
}
add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x8_64_add/;
@@ -90,25 +88,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add sse2/;
if (aom_config("CONFIG_EXT_TX") eq "yes") {
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x8_32_add sse2/;
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x4_32_add sse2/;
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x16_128_add sse2/;
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x8_128_add sse2/;
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x32_512_add sse2/;
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht32x16_512_add sse2/;
}
add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x8_64_add sse2/;
@@ -122,25 +118,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add/;
if (aom_config("CONFIG_EXT_TX") eq "yes") {
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x8_32_add/;
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x4_32_add/;
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x16_128_add/;
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x8_128_add/;
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x32_512_add/;
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht32x16_512_add/;
}
add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x8_64_add/;
@@ -151,25 +145,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add sse2 neon dspr2/;
if (aom_config("CONFIG_EXT_TX") eq "yes") {
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x8_32_add sse2/;
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x4_32_add sse2/;
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x16_128_add sse2/;
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x8_128_add sse2/;
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht16x32_512_add sse2/;
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht32x16_512_add sse2/;
}
add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht8x8_64_add sse2 neon dspr2/;
@@ -283,25 +275,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht4x4_16_add/;
if (aom_config("CONFIG_EXT_TX") eq "yes") {
add_proto qw/void av1_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
add_proto qw/void av1_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht4x8_32_add/;
add_proto qw/void av1_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
add_proto qw/void av1_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht8x4_32_add/;
add_proto qw/void av1_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
add_proto qw/void av1_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht8x16_128_add/;
add_proto qw/void av1_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
add_proto qw/void av1_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht16x8_128_add/;
add_proto qw/void av1_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
add_proto qw/void av1_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht16x32_512_add/;
add_proto qw/void av1_highbd_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
add_proto qw/void av1_highbd_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht32x16_512_add/;
}
add_proto qw/void av1_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
specialize qw/av1_highbd_iht8x8_64_add/;

View File

@@ -58,6 +58,7 @@ static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
}
#endif // CONFIG_EXT_TX
// For use in lieu of ADST
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
@@ -75,6 +76,7 @@ static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
}
#if CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EXT_TX
static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
int bd) {
int i;
@@ -120,9 +122,11 @@ static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
aom_highbd_idct16_c(inputhalf, output + 16, bd);
// Note overall scaling factor is 4 times orthogonal
}
#endif // CONFIG_EXT_TX
#endif // CONFIG_AOM_HIGHBITDEPTH
// Inverse identity transform and add.
#if CONFIG_EXT_TX
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int bs, int tx_type) {
int r, c;
@@ -136,6 +140,7 @@ static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
}
#endif // CONFIG_EXT_TX
#define FLIPUD_PTR(dest, stride, size) \
do { \
@@ -143,6 +148,7 @@ static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
(stride) = -(stride); \
} while (0)
#if CONFIG_EXT_TX
static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
int *sstride, int tx_type, int sizey,
int sizex) {
@@ -180,8 +186,10 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
default: assert(0); break;
}
}
#endif // CONFIG_EXT_TX
#if CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EXT_TX
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bs, int tx_type, int bd) {
int r, c;
@@ -235,8 +243,8 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
default: assert(0); break;
}
}
#endif // CONFIG_AOM_HIGHBITDEPTH
#endif // CONFIG_EXT_TX
#endif // CONFIG_AOM_HIGHBITDEPTH
void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -301,7 +309,6 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
#if CONFIG_EXT_TX
void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_4x8[] = {
@@ -309,6 +316,7 @@ void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ aom_iadst8_c, aom_idct4_c }, // ADST_DCT
{ aom_idct8_c, aom_iadst4_c }, // DCT_ADST
{ aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
{ aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
{ aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
@@ -321,6 +329,7 @@ void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx8_c, aom_iadst4_c }, // H_ADST
{ aom_iadst8_c, iidtx4_c }, // V_FLIPADST
{ iidtx8_c, aom_iadst4_c }, // H_FLIPADST
#endif
};
const int n = 4;
@@ -343,7 +352,9 @@ void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_4x8[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
#endif
// Sum with the destination
for (i = 0; i < n2; ++i) {
@@ -362,6 +373,7 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ aom_iadst4_c, aom_idct8_c }, // ADST_DCT
{ aom_idct4_c, aom_iadst8_c }, // DCT_ADST
{ aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
{ aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
{ aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
@@ -374,6 +386,7 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx4_c, aom_iadst8_c }, // H_ADST
{ aom_iadst4_c, iidtx8_c }, // V_FLIPADST
{ iidtx4_c, aom_iadst8_c }, // H_FLIPADST
#endif
};
const int n = 4;
const int n2 = 8;
@@ -396,7 +409,9 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_8x4[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
#endif
// Sum with the destination
for (i = 0; i < n; ++i) {
@@ -415,6 +430,7 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ aom_iadst16_c, aom_idct8_c }, // ADST_DCT
{ aom_idct16_c, aom_iadst8_c }, // DCT_ADST
{ aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
{ aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
{ aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
@@ -427,6 +443,7 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx16_c, aom_iadst8_c }, // H_ADST
{ aom_iadst16_c, iidtx8_c }, // V_FLIPADST
{ iidtx16_c, aom_iadst8_c }, // H_FLIPADST
#endif
};
const int n = 8;
@@ -449,7 +466,9 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_8x16[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
#endif
// Sum with the destination
for (i = 0; i < n2; ++i) {
@@ -468,6 +487,7 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ aom_iadst8_c, aom_idct16_c }, // ADST_DCT
{ aom_idct8_c, aom_iadst16_c }, // DCT_ADST
{ aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
{ aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
{ aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
@@ -480,6 +500,7 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx8_c, aom_iadst16_c }, // H_ADST
{ aom_iadst8_c, iidtx16_c }, // V_FLIPADST
{ iidtx8_c, aom_iadst16_c }, // H_FLIPADST
#endif
};
const int n = 8;
const int n2 = 16;
@@ -502,7 +523,9 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_16x8[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
#endif
// Sum with the destination
for (i = 0; i < n; ++i) {
@@ -521,6 +544,7 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ ihalfright32_c, aom_idct16_c }, // ADST_DCT
{ aom_idct32_c, aom_iadst16_c }, // DCT_ADST
{ ihalfright32_c, aom_iadst16_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
{ aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
{ ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
@@ -533,6 +557,7 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx32_c, aom_iadst16_c }, // H_ADST
{ ihalfright32_c, iidtx16_c }, // V_FLIPADST
{ iidtx32_c, aom_iadst16_c }, // H_FLIPADST
#endif
};
const int n = 16;
@@ -555,7 +580,9 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_16x32[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
#endif
// Sum with the destination
for (i = 0; i < n2; ++i) {
@@ -574,6 +601,7 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ aom_iadst16_c, aom_idct32_c }, // ADST_DCT
{ aom_idct16_c, ihalfright32_c }, // DCT_ADST
{ aom_iadst16_c, ihalfright32_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
{ aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
{ aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
@@ -586,6 +614,7 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx16_c, ihalfright32_c }, // H_ADST
{ aom_iadst16_c, iidtx32_c }, // V_FLIPADST
{ iidtx16_c, ihalfright32_c }, // H_FLIPADST
#endif
};
const int n = 16;
const int n2 = 32;
@@ -608,7 +637,9 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_32x16[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
#endif
// Sum with the destination
for (i = 0; i < n; ++i) {
@@ -619,7 +650,6 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
}
#endif // CONFIG_EXT_TX
void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -905,7 +935,6 @@ void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
}
}
#if CONFIG_EXT_TX
void av1_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type) {
(void)eob;
@@ -941,7 +970,6 @@ void av1_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, int stride,
(void)eob;
av1_iht32x16_512_add(input, dest, stride, tx_type);
}
#endif // CONFIG_EXT_TX
void av1_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type) {
@@ -1909,7 +1937,6 @@ void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
av1_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
break;
case TX_8X8: av1_inv_txfm_add_8x8(input, dest, stride, eob, tx_type); break;
#if CONFIG_EXT_TX
case TX_4X8: av1_inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
case TX_8X4: av1_inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
case TX_8X16:
@@ -1924,7 +1951,6 @@ void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
case TX_32X16:
av1_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
break;
#endif // CONFIG_EXT_TX
case TX_4X4:
// this is like av1_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless

View File

@@ -67,12 +67,10 @@ void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type, int lossless);
#if CONFIG_EXT_TX
void av1_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type);
void av1_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type);
#endif // CONFIG_EXT_TX
void av1_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type);
void av1_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
@@ -95,12 +93,10 @@ void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type,
int lossless);
#if CONFIG_EXT_TX
void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type);
void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type);
#endif // CONFIG_EXT_TX
void av1_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type);
void av1_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,

View File

@@ -36,12 +36,12 @@ DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = {
0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
0, 1, 4, 5, 2, 8, 6, 9, 10, 3, 12, 7, 13, 11, 14, 16,
17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
@@ -51,12 +51,14 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
#endif
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
0, 1, 8, 9, 2, 16, 10, 17, 18, 3, 24, 11, 25, 19, 26, 4,
12, 27, 20, 5, 28, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
@@ -66,7 +68,7 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
#endif // CONFIG_EXT_TX
#endif
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26,
@@ -105,7 +107,6 @@ DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = {
58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32,
5, 12, 19, 26, 33, 40, 6, 13, 20, 27, 34, 41, 48, 7, 14,
@@ -129,6 +130,7 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = {
122, 63, 78, 93, 108, 123, 79, 94, 109, 124, 95, 110, 125, 111, 126, 127,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = {
0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120,
1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121,
@@ -174,6 +176,7 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x8[128]) = {
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127,
};
#endif
DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64,
@@ -251,6 +254,7 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = {
510, 511,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = {
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224,
240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
@@ -1034,7 +1038,6 @@ DECLARE_ALIGNED(16, static const int16_t,
8, 3, 6, 8, 9, 6, 9, 9, 12, 7, 10, 10, 13, 11, 14, 0, 0,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6,
@@ -1043,6 +1046,7 @@ DECLARE_ALIGNED(16, static const int16_t,
24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 0,
@@ -1058,6 +1062,7 @@ DECLARE_ALIGNED(16, static const int16_t,
13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
};
#endif
DECLARE_ALIGNED(16, static const int16_t,
default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
@@ -1067,6 +1072,7 @@ DECLARE_ALIGNED(16, static const int16_t,
13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 8, 8, 16, 16, 0, 0, 1, 8, 9, 16, 17, 24, 1,
@@ -1141,7 +1147,6 @@ DECLARE_ALIGNED(16, static const int16_t,
31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
default_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 8, 8, 2, 2, 2,
@@ -1186,6 +1191,7 @@ DECLARE_ALIGNED(16, static const int16_t,
126, 0, 0
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48,
@@ -1271,6 +1277,7 @@ DECLARE_ALIGNED(16, static const int16_t,
104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
126, 0, 0
};
#endif
DECLARE_ALIGNED(16, static const int16_t,
default_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
@@ -1418,6 +1425,7 @@ DECLARE_ALIGNED(16, static const int16_t,
478, 509, 479, 510, 0, 0
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
@@ -2841,12 +2849,12 @@ DECLARE_ALIGNED(16, static const int16_t, av1_row_iscan_4x4[16]) = {
0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x8[32]) = {
0, 1, 4, 9, 2, 3, 6, 11, 5, 7, 8, 13, 10, 12, 14, 17,
15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = {
0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
@@ -2856,12 +2864,14 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x8[32]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};
#endif
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x4[32]) = {
0, 1, 4, 9, 15, 19, 24, 28, 2, 3, 6, 11, 16, 21, 25, 29,
5, 7, 8, 13, 18, 22, 26, 30, 10, 12, 14, 17, 20, 23, 27, 31,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x4[32]) = {
0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
@@ -2910,7 +2920,6 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x8[64]) = {
25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = {
0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29, 36,
5, 8, 12, 17, 23, 30, 37, 44, 9, 13, 18, 24, 31, 38, 45, 52,
@@ -2933,6 +2942,7 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x8[128]) = {
35, 43, 51, 59, 67, 75, 83, 91, 99, 106, 112, 117, 121, 124, 126, 127,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x16[128]) = {
0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113,
2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115,
@@ -2978,6 +2988,7 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x8[128]) = {
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127,
};
#endif
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = {
0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105,
@@ -3055,6 +3066,7 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x16[512]) = {
510, 511,
};
#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x32[512]) = {
0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
@@ -3810,7 +3822,6 @@ const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
{ default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors },
};
#if CONFIG_EXT_TX
const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
#if CONFIG_CB4X4
{
@@ -3819,6 +3830,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
#if CONFIG_EXT_TX
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3831,6 +3843,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
#endif // CONFIG_EXT_TX
},
#endif
{
@@ -3839,6 +3852,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
#if CONFIG_EXT_TX
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3851,6 +3865,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_8X8
@@ -3858,6 +3873,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
{ col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
#if CONFIG_EXT_TX
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
@@ -3870,6 +3886,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
{ row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
{ col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_16X16
@@ -3879,6 +3896,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
#if CONFIG_EXT_TX
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
@@ -3896,11 +3914,13 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
{ row_scan_16x16, av1_row_iscan_16x16, row_scan_16x16_neighbors },
{ col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_32X32
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
#if CONFIG_EXT_TX
{ h2_scan_32x32, av1_h2_iscan_32x32, h2_scan_32x32_neighbors },
{ v2_scan_32x32, av1_v2_iscan_32x32, v2_scan_32x32_neighbors },
{ qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
@@ -3916,6 +3936,7 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
{ mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
#endif // CONFIG_EXT_TX
}
};
@@ -3927,6 +3948,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
#if CONFIG_EXT_TX
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3939,6 +3961,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
{ mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
#endif // CONFIG_EXT_TX
},
#endif
{
@@ -3947,6 +3970,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
#if CONFIG_EXT_TX
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3959,6 +3983,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
{ mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_8X8
@@ -3966,6 +3991,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
#if CONFIG_EXT_TX
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
@@ -3978,6 +4004,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
{ mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
{ mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_16X16
@@ -3989,6 +4016,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
#if CONFIG_EXT_TX
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
@@ -4006,11 +4034,13 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
{ mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
{ mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_32X32
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
#if CONFIG_EXT_TX
{ h2_scan_32x32, av1_h2_iscan_32x32, h2_scan_32x32_neighbors },
{ v2_scan_32x32, av1_v2_iscan_32x32, v2_scan_32x32_neighbors },
{ qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
@@ -4026,6 +4056,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
{ mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_4X8
@@ -4033,6 +4064,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
#if CONFIG_EXT_TX
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
@@ -4045,6 +4077,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
{ mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
{ mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_8X4
@@ -4052,6 +4085,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
#if CONFIG_EXT_TX
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
@@ -4064,6 +4098,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
{ mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
{ mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_8X16
@@ -4075,6 +4110,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_8x16_neighbors },
{ default_scan_8x16, av1_default_iscan_8x16,
default_scan_8x16_neighbors },
#if CONFIG_EXT_TX
{ default_scan_8x16, av1_default_iscan_8x16,
default_scan_8x16_neighbors },
{ default_scan_8x16, av1_default_iscan_8x16,
@@ -4092,6 +4128,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
{ mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
{ mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_16X8
@@ -4103,6 +4140,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x8_neighbors },
{ default_scan_16x8, av1_default_iscan_16x8,
default_scan_16x8_neighbors },
#if CONFIG_EXT_TX
{ default_scan_16x8, av1_default_iscan_16x8,
default_scan_16x8_neighbors },
{ default_scan_16x8, av1_default_iscan_16x8,
@@ -4120,6 +4158,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
{ mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
{ mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_16X32
@@ -4131,6 +4170,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x32_neighbors },
{ default_scan_16x32, av1_default_iscan_16x32,
default_scan_16x32_neighbors },
#if CONFIG_EXT_TX
{ default_scan_16x32, av1_default_iscan_16x32,
default_scan_16x32_neighbors },
{ default_scan_16x32, av1_default_iscan_16x32,
@@ -4148,6 +4188,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
{ mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
{ mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
#endif // CONFIG_EXT_TX
},
{
// TX_32X16
@@ -4159,6 +4200,7 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_32x16_neighbors },
{ default_scan_32x16, av1_default_iscan_32x16,
default_scan_32x16_neighbors },
#if CONFIG_EXT_TX
{ default_scan_32x16, av1_default_iscan_32x16,
default_scan_32x16_neighbors },
{ default_scan_32x16, av1_default_iscan_32x16,
@@ -4176,49 +4218,9 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
{ mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
{ mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
}
};
#else // CONFIG_EXT_TX
const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
#if CONFIG_CB4X4
{ // TX_2X2
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors } },
#endif
{ // TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
{ col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors } },
{ // TX_8X8
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
{ col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors } },
{ // TX_16X16
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
{ row_scan_16x16, av1_row_iscan_16x16, row_scan_16x16_neighbors },
{ col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors } },
{
// TX_32X32
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
}
};
#endif // CONFIG_EXT_TX
}
};
#if CONFIG_ADAPT_SCAN
// TX_32X32 will has 1024 coefficients whose indexes can be represented in 10

View File

@@ -27,6 +27,7 @@ extern "C" {
extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES];
extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
#if CONFIG_ADAPT_SCAN
void av1_update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
@@ -87,7 +88,7 @@ static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
return &cm->fc->sc[tx_size][tx_type];
#else // CONFIG_ADAPT_SCAN
(void)cm;
#if CONFIG_EXT_TX
#if CONFIG_EXT_TX || CONFIG_VAR_TX
return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
: &av1_intra_scan_orders[tx_size][tx_type];
#else

View File

@@ -543,6 +543,7 @@ static INLINE void flip_buffer_lr_8x8(__m128i *in) {
in[6] = mm_reverse_epi16(in[6]);
in[7] = mm_reverse_epi16(in[7]);
}
#endif // CONFIG_EXT_TX
static INLINE void scale_sqrt2_8x4(__m128i *in) {
// Implements 'ROUND_POWER_OF_TWO(input * Sqrt2, DCT_CONST_BITS)'
@@ -665,8 +666,10 @@ void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case H_DCT:
#endif
aom_idct8_sse2(in);
array_transpose_8x8(in, in);
aom_idct8_sse2(in + 8);
@@ -674,17 +677,20 @@ void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
break;
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case H_ADST:
case H_FLIPADST:
#endif
aom_iadst8_sse2(in);
array_transpose_8x8(in, in);
aom_iadst8_sse2(in + 8);
array_transpose_8x8(in + 8, in + 8);
break;
#if CONFIG_EXT_TX
case V_FLIPADST:
case V_ADST:
case V_DCT:
@@ -692,6 +698,7 @@ void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
iidtx8_sse2(in);
iidtx8_sse2(in + 8);
break;
#endif
default: assert(0); break;
}
scale_sqrt2_8x8(in);
@@ -701,33 +708,50 @@ void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case DCT_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case V_DCT: idct16_8col(in); break;
case V_DCT:
#endif
idct16_8col(in);
break;
case ADST_DCT:
case ADST_ADST:
#if CONFIG_EXT_TX
case FLIPADST_ADST:
case ADST_FLIPADST:
case FLIPADST_FLIPADST:
case FLIPADST_DCT:
case V_ADST:
case V_FLIPADST: iadst16_8col(in); break;
case V_FLIPADST:
#endif
iadst16_8col(in);
break;
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case H_FLIPADST:
case IDTX: iidtx16_8col(in); break;
#endif
default: assert(0); break;
}
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case H_DCT:
#endif
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case H_ADST:
case V_ADST:
case V_DCT:
case IDTX: write_buffer_8x16(dest, in, stride); break;
case IDTX:
#endif
write_buffer_8x16(dest, in, stride);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST: write_buffer_8x16(dest + stride * 15, in, -stride); break;
@@ -743,6 +767,7 @@ void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
flip_buffer_lr_8x8(in + 8);
write_buffer_8x16(dest + stride * 15, in, -stride);
break;
#endif
default: assert(0); break;
}
}
@@ -809,20 +834,30 @@ void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case H_DCT: idct16_8col(in); break;
case H_DCT:
#endif
idct16_8col(in);
break;
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case H_ADST:
case H_FLIPADST: iadst16_8col(in); break;
case H_FLIPADST:
#endif
iadst16_8col(in);
break;
#if CONFIG_EXT_TX
case V_FLIPADST:
case V_ADST:
case V_DCT:
case IDTX: iidtx16_8col(in); break;
#endif
default: assert(0); break;
}
@@ -834,22 +869,27 @@ void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case DCT_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case V_DCT:
#endif
aom_idct8_sse2(in);
aom_idct8_sse2(in + 8);
break;
case ADST_DCT:
case ADST_ADST:
#if CONFIG_EXT_TX
case FLIPADST_ADST:
case ADST_FLIPADST:
case FLIPADST_FLIPADST:
case FLIPADST_DCT:
case V_ADST:
case V_FLIPADST:
#endif
aom_iadst8_sse2(in);
aom_iadst8_sse2(in + 8);
break;
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case H_FLIPADST:
@@ -859,22 +899,26 @@ void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest,
iidtx8_sse2(in);
iidtx8_sse2(in + 8);
break;
#endif
default: assert(0); break;
}
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case H_DCT:
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case V_ADST:
case V_DCT:
case IDTX:
#endif
write_buffer_8x8_round6(dest, in, stride);
write_buffer_8x8_round6(dest + 8, in + 8, stride);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST:
@@ -895,6 +939,7 @@ void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest,
write_buffer_8x8_round6(dest + stride * 7, in + 8, -stride);
write_buffer_8x8_round6(dest + stride * 7 + 8, in, -stride);
break;
#endif
default: assert(0); break;
}
}
@@ -933,10 +978,15 @@ void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case H_DCT: aom_idct8_sse2(in); break;
case H_DCT:
#endif
aom_idct8_sse2(in);
break;
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
@@ -946,9 +996,8 @@ void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
case V_FLIPADST:
case V_ADST:
case V_DCT:
case IDTX:
iidtx8_sse2(in);
array_transpose_8x8(in, in);
case IDTX: iidtx8_sse2(in); array_transpose_8x8(in, in);
#endif
break;
default: assert(0); break;
}
@@ -967,22 +1016,27 @@ void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case DCT_DCT:
case DCT_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case V_DCT:
#endif
aom_idct4_sse2(in + 4);
aom_idct4_sse2(in + 6);
break;
case ADST_DCT:
case ADST_ADST:
#if CONFIG_EXT_TX
case FLIPADST_ADST:
case ADST_FLIPADST:
case FLIPADST_FLIPADST:
case FLIPADST_DCT:
case V_ADST:
case V_FLIPADST:
#endif
aom_iadst4_sse2(in + 4);
aom_iadst4_sse2(in + 6);
break;
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case H_FLIPADST:
@@ -992,6 +1046,7 @@ void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
iidtx4_sse2(in + 6);
array_transpose_4x4(in + 6);
break;
#endif
default: assert(0); break;
}
@@ -1004,9 +1059,10 @@ void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case H_DCT:
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case V_ADST:
case V_DCT:
@@ -1028,6 +1084,7 @@ void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[2] = mm_reverse_epi16(in[2]);
in[3] = mm_reverse_epi16(in[3]);
FLIPUD_PTR(dest, stride, 4);
#endif
break;
default: assert(0); break;
}
@@ -1111,22 +1168,27 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case H_DCT:
#endif
aom_idct4_sse2(in + 4);
aom_idct4_sse2(in + 6);
break;
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case H_ADST:
case H_FLIPADST:
#endif
aom_iadst4_sse2(in + 4);
aom_iadst4_sse2(in + 6);
break;
#if CONFIG_EXT_TX
case V_FLIPADST:
case V_ADST:
case V_DCT:
@@ -1136,6 +1198,7 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
iidtx4_sse2(in + 6);
array_transpose_4x4(in + 6);
break;
#endif
default: assert(0); break;
}
@@ -1149,16 +1212,25 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
switch (tx_type) {
case DCT_DCT:
case DCT_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case V_DCT: aom_idct8_sse2(in); break;
case V_DCT:
#endif
aom_idct8_sse2(in);
break;
case ADST_DCT:
case ADST_ADST:
#if CONFIG_EXT_TX
case FLIPADST_ADST:
case ADST_FLIPADST:
case FLIPADST_FLIPADST:
case FLIPADST_DCT:
case V_ADST:
case V_FLIPADST: aom_iadst8_sse2(in); break;
case V_FLIPADST:
#endif
aom_iadst8_sse2(in);
break;
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case H_FLIPADST:
@@ -1166,19 +1238,24 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
iidtx8_sse2(in);
array_transpose_8x8(in, in);
break;
#endif
default: assert(0); break;
}
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case H_DCT:
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case V_ADST:
case V_DCT:
case IDTX: break;
case IDTX:
#endif
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST: FLIPUD_PTR(dest, stride, 8); break;
@@ -1205,6 +1282,7 @@ void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[7] = _mm_shufflelo_epi16(in[7], 0x1b);
FLIPUD_PTR(dest, stride, 8);
break;
#endif
default: assert(0); break;
}
in[0] = _mm_unpacklo_epi64(in[0], in[1]);
@@ -1255,6 +1333,7 @@ static INLINE void ihalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
aom_idct16_sse2(bl, br); // Includes a transposition
}
#if CONFIG_EXT_TX
static INLINE void iidtx32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
__m128i *br) {
int i;
@@ -1267,6 +1346,7 @@ static INLINE void iidtx32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
br[i] = _mm_slli_epi16(br[i], 2);
}
}
#endif // CONFIG_EXT_TX
static INLINE void write_buffer_16x32_round6(uint8_t *dest, __m128i *intl,
__m128i *intr, __m128i *inbl,
@@ -1307,22 +1387,27 @@ void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case H_DCT:
#endif
aom_idct16_sse2(intl, intr);
aom_idct16_sse2(inbl, inbr);
break;
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case H_ADST:
case H_FLIPADST:
#endif
aom_iadst16_sse2(intl, intr);
aom_iadst16_sse2(inbl, inbr);
break;
#if CONFIG_EXT_TX
case V_FLIPADST:
case V_ADST:
case V_DCT:
@@ -1330,6 +1415,7 @@ void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
iidtx16_sse2(intl, intr);
iidtx16_sse2(inbl, inbr);
break;
#endif
default: assert(0); break;
}
@@ -1342,33 +1428,47 @@ void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case DCT_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case V_DCT: idct32_16col(intl, intr, inbl, inbr); break;
case V_DCT:
#endif
idct32_16col(intl, intr, inbl, inbr);
break;
case ADST_DCT:
case ADST_ADST:
#if CONFIG_EXT_TX
case FLIPADST_ADST:
case ADST_FLIPADST:
case FLIPADST_FLIPADST:
case FLIPADST_DCT:
case V_ADST:
case V_FLIPADST: ihalfright32_16col(intl, intr, inbl, inbr); break;
case V_FLIPADST:
#endif
ihalfright32_16col(intl, intr, inbl, inbr);
break;
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case H_FLIPADST:
case IDTX: iidtx32_16col(intl, intr, inbl, inbr); break;
#endif
default: assert(0); break;
}
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case H_DCT:
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case V_ADST:
case V_DCT:
case IDTX: break;
case IDTX:
#endif
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST: FLIPUD_PTR(dest, stride, 32); break;
@@ -1395,6 +1495,7 @@ void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
}
FLIPUD_PTR(dest, stride, 32);
break;
#endif
default: assert(0); break;
}
write_buffer_16x32_round6(dest, intl, intr, inbl, inbr, stride);
@@ -1439,20 +1540,30 @@ void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case H_DCT: idct32_16col(in0, in1, in2, in3); break;
case H_DCT:
#endif
idct32_16col(in0, in1, in2, in3);
break;
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case H_ADST:
case H_FLIPADST: ihalfright32_16col(in0, in1, in2, in3); break;
case H_FLIPADST:
#endif
ihalfright32_16col(in0, in1, in2, in3);
break;
#if CONFIG_EXT_TX
case V_FLIPADST:
case V_ADST:
case V_DCT:
case IDTX: iidtx32_16col(in0, in1, in2, in3); break;
#endif
default: assert(0); break;
}
@@ -1465,22 +1576,27 @@ void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest,
switch (tx_type) {
case DCT_DCT:
case DCT_ADST:
#if CONFIG_EXT_TX
case DCT_FLIPADST:
case V_DCT:
#endif
aom_idct16_sse2(in0, in1);
aom_idct16_sse2(in2, in3);
break;
case ADST_DCT:
case ADST_ADST:
#if CONFIG_EXT_TX
case FLIPADST_ADST:
case ADST_FLIPADST:
case FLIPADST_FLIPADST:
case FLIPADST_DCT:
case V_ADST:
case V_FLIPADST:
#endif
aom_iadst16_sse2(in0, in1);
aom_iadst16_sse2(in2, in3);
break;
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case H_FLIPADST:
@@ -1488,19 +1604,24 @@ void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest,
iidtx16_sse2(in0, in1);
iidtx16_sse2(in2, in3);
break;
#endif
default: assert(0); break;
}
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case H_DCT:
case DCT_ADST:
case ADST_ADST:
#if CONFIG_EXT_TX
case H_DCT:
case H_ADST:
case V_ADST:
case V_DCT:
case IDTX: break;
case IDTX:
#endif
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST: FLIPUD_PTR(dest, stride, 16); break;
@@ -1527,8 +1648,8 @@ void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest,
}
FLIPUD_PTR(dest, stride, 16);
break;
#endif
default: assert(0); break;
}
write_buffer_32x16_round6(dest, in0, in1, in2, in3, stride);
}
#endif // CONFIG_EXT_TX