Merge "Support rectangular tx_size in the common lib" into nextgenv2
This commit is contained in:
@@ -389,25 +389,23 @@ if (aom_config("CONFIG_TX64X64") eq "yes") {
|
||||
specialize qw/av1_fht64x64/;
|
||||
}
|
||||
|
||||
if (aom_config("CONFIG_EXT_TX") eq "yes") {
|
||||
add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht4x8 sse2/;
|
||||
add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht4x8 sse2/;
|
||||
|
||||
add_proto qw/void av1_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht8x4 sse2/;
|
||||
add_proto qw/void av1_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht8x4 sse2/;
|
||||
|
||||
add_proto qw/void av1_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht8x16 sse2/;
|
||||
add_proto qw/void av1_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht8x16 sse2/;
|
||||
|
||||
add_proto qw/void av1_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht16x8 sse2/;
|
||||
add_proto qw/void av1_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht16x8 sse2/;
|
||||
|
||||
add_proto qw/void av1_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht16x32 sse2/;
|
||||
add_proto qw/void av1_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht16x32 sse2/;
|
||||
|
||||
add_proto qw/void av1_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht32x16 sse2/;
|
||||
}
|
||||
add_proto qw/void av1_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/av1_fht32x16 sse2/;
|
||||
|
||||
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") ne "yes") {
|
||||
if (aom_config("CONFIG_EXT_TX") ne "yes") {
|
||||
|
@@ -55,7 +55,6 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt) {
|
||||
@@ -97,7 +96,6 @@ static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
|
||||
(void)fwd_txfm_opt;
|
||||
av1_fht32x16(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
@@ -233,7 +231,6 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
|
||||
@@ -281,7 +278,6 @@ static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
|
||||
(void)bd;
|
||||
av1_highbd_fht32x16(src_diff, coeff, diff_stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
|
||||
int diff_stride, TX_TYPE tx_type,
|
||||
@@ -403,7 +399,6 @@ void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
|
||||
case TX_8X8:
|
||||
fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case TX_4X8:
|
||||
fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
@@ -422,7 +417,6 @@ void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
|
||||
case TX_32X16:
|
||||
fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_4X4:
|
||||
fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
|
||||
break;
|
||||
@@ -452,7 +446,6 @@ void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
|
||||
highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
|
||||
bd);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case TX_4X8:
|
||||
highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
|
||||
bd);
|
||||
@@ -477,7 +470,6 @@ void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
|
||||
highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
|
||||
bd);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case TX_4X4:
|
||||
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
|
||||
break;
|
||||
|
@@ -2592,7 +2592,6 @@ void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static INLINE void scale_sqrt2_8x4(__m128i *in) {
|
||||
// Implements fdct_round_shift(input * Sqrt2), which is equivalent to
|
||||
// ROUND_POWER_OF_TWO(input * Sqrt2, DCT_CONST_BITS),
|
||||
@@ -2767,9 +2766,9 @@ void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
int tx_type) {
|
||||
__m128i in[8];
|
||||
|
||||
load_buffer_4x8(input, in, stride, 0, 0);
|
||||
switch (tx_type) {
|
||||
case DCT_DCT:
|
||||
load_buffer_4x8(input, in, stride, 0, 0);
|
||||
fdct8_sse2(in);
|
||||
// Repack data into two 4x4 blocks so we can reuse the 4x4 transforms
|
||||
// The other cases (and the 8x4 transforms) all behave similarly
|
||||
@@ -2781,7 +2780,6 @@ void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fdct4_sse2(in + 4);
|
||||
break;
|
||||
case ADST_DCT:
|
||||
load_buffer_4x8(input, in, stride, 0, 0);
|
||||
fadst8_sse2(in);
|
||||
in[4] = _mm_shuffle_epi32(in[0], 0xe);
|
||||
in[5] = _mm_shuffle_epi32(in[1], 0xe);
|
||||
@@ -2791,7 +2789,6 @@ void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fdct4_sse2(in + 4);
|
||||
break;
|
||||
case DCT_ADST:
|
||||
load_buffer_4x8(input, in, stride, 0, 0);
|
||||
fdct8_sse2(in);
|
||||
in[4] = _mm_shuffle_epi32(in[0], 0xe);
|
||||
in[5] = _mm_shuffle_epi32(in[1], 0xe);
|
||||
@@ -2801,7 +2798,6 @@ void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst4_sse2(in + 4);
|
||||
break;
|
||||
case ADST_ADST:
|
||||
load_buffer_4x8(input, in, stride, 0, 0);
|
||||
fadst8_sse2(in);
|
||||
in[4] = _mm_shuffle_epi32(in[0], 0xe);
|
||||
in[5] = _mm_shuffle_epi32(in[1], 0xe);
|
||||
@@ -2810,6 +2806,7 @@ void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst4_sse2(in);
|
||||
fadst4_sse2(in + 4);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
load_buffer_4x8(input, in, stride, 1, 0);
|
||||
fadst8_sse2(in);
|
||||
@@ -2930,6 +2927,7 @@ void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst4_sse2(in);
|
||||
fadst4_sse2(in + 4);
|
||||
break;
|
||||
#endif
|
||||
default: assert(0); break;
|
||||
}
|
||||
write_buffer_4x8(output, in);
|
||||
@@ -3023,6 +3021,7 @@ void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst4_sse2(in + 4);
|
||||
fadst8_sse2(in);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
load_buffer_8x4(input, in, stride, 1, 0);
|
||||
fadst4_sse2(in);
|
||||
@@ -3095,6 +3094,7 @@ void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fidtx4_sse2(in + 4);
|
||||
fadst8_sse2(in);
|
||||
break;
|
||||
#endif
|
||||
default: assert(0); break;
|
||||
}
|
||||
write_buffer_8x4(output, in);
|
||||
@@ -3158,6 +3158,7 @@ void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst8_sse2(t);
|
||||
fadst8_sse2(b);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
load_buffer_8x16(input, in, stride, 1, 0);
|
||||
fadst16_8col(in);
|
||||
@@ -3254,6 +3255,7 @@ void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst8_sse2(t);
|
||||
fadst8_sse2(b);
|
||||
break;
|
||||
#endif
|
||||
default: assert(0); break;
|
||||
}
|
||||
right_shift_8x8(t, 2);
|
||||
@@ -3314,6 +3316,7 @@ void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst8_sse2(r);
|
||||
fadst16_8col(in);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
load_buffer_16x8(input, in, stride, 1, 0);
|
||||
fadst8_sse2(l);
|
||||
@@ -3386,6 +3389,7 @@ void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fidtx8_sse2(r);
|
||||
fadst16_8col(in);
|
||||
break;
|
||||
#endif
|
||||
default: assert(0); break;
|
||||
}
|
||||
array_transpose_8x8(l, l);
|
||||
@@ -3436,6 +3440,7 @@ static INLINE void fhalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
|
||||
fdct16_sse2(tl, tr);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static INLINE void fidtx32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
|
||||
__m128i *br) {
|
||||
int i;
|
||||
@@ -3448,6 +3453,7 @@ static INLINE void fidtx32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
|
||||
array_transpose_16x16(tl, tr);
|
||||
array_transpose_16x16(bl, br);
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE void load_buffer_16x32(const int16_t *input, __m128i *intl,
|
||||
__m128i *intr, __m128i *inbl,
|
||||
@@ -3507,7 +3513,7 @@ static INLINE void write_buffer_16x32(tran_low_t *output, __m128i *restl,
|
||||
}
|
||||
}
|
||||
|
||||
// Note on data layout, for both this and the 32x16 tranforms:
|
||||
// Note on data layout, for both this and the 32x16 transforms:
|
||||
// So that we can reuse the 16-element transforms easily,
|
||||
// we want to split the input into 8x16 blocks.
|
||||
// For 16x32, this means the input is a 2x2 grid of such blocks.
|
||||
@@ -3541,6 +3547,7 @@ void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst16_sse2(intl, intr);
|
||||
fadst16_sse2(inbl, inbr);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
|
||||
fhalfright32_16col(intl, intr, inbl, inbr);
|
||||
@@ -3613,6 +3620,7 @@ void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fadst16_sse2(intl, intr);
|
||||
fadst16_sse2(inbl, inbr);
|
||||
break;
|
||||
#endif
|
||||
default: assert(0); break;
|
||||
}
|
||||
write_buffer_16x32(output, intl, intr, inbl, inbr);
|
||||
@@ -3671,31 +3679,29 @@ void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
int tx_type) {
|
||||
__m128i in0[16], in1[16], in2[16], in3[16];
|
||||
|
||||
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
|
||||
switch (tx_type) {
|
||||
case DCT_DCT:
|
||||
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
|
||||
fdct16_sse2(in0, in1);
|
||||
fdct16_sse2(in2, in3);
|
||||
fdct32_16col(in0, in1, in2, in3);
|
||||
break;
|
||||
case ADST_DCT:
|
||||
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
|
||||
fadst16_sse2(in0, in1);
|
||||
fadst16_sse2(in2, in3);
|
||||
fdct32_16col(in0, in1, in2, in3);
|
||||
break;
|
||||
case DCT_ADST:
|
||||
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
|
||||
fdct16_sse2(in0, in1);
|
||||
fdct16_sse2(in2, in3);
|
||||
fhalfright32_16col(in0, in1, in2, in3);
|
||||
break;
|
||||
case ADST_ADST:
|
||||
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
|
||||
fadst16_sse2(in0, in1);
|
||||
fadst16_sse2(in2, in3);
|
||||
fhalfright32_16col(in0, in1, in2, in3);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
|
||||
fadst16_sse2(in0, in1);
|
||||
@@ -3768,8 +3774,8 @@ void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride,
|
||||
fidtx16_sse2(in2, in3);
|
||||
fhalfright32_16col(in0, in1, in2, in3);
|
||||
break;
|
||||
#endif
|
||||
default: assert(0); break;
|
||||
}
|
||||
write_buffer_32x16(output, in0, in1, in2, in3);
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
Reference in New Issue
Block a user