Flip the result of the inv transform for FLIPADST.
This is a port of 4f5108090a6047d5d4d9ce1df302da23b2ef4bc5 This commit also fixes a bug where FLIPADST transforms when combined with a DST (that is FLIPADST_DST and DST_FLIPADST) did not actually did a flipped transform but a straight ADST instead. This was due to the C implementation that it fell back on not implementing flipping. This is now fixed as well and FLIPADST_DST and DST_FLIPADST does what it is supposed to do. There are 3 functions in the SR_MODE experiment that should be updated, but given that the build of SR_MODE is broken at the upstream tip of nextgen, I could not test these, so I have put in assertions and FIXME notes at the problematic places. Change-Id: I5b8175b85f944f2369b183a26256e08d97f4bdef
This commit is contained in:
parent
f1f3a8ab14
commit
85ab9d56cc
@ -16,6 +16,59 @@
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
#define FLIPUD_PTR(dest, stride, size) do { \
|
||||
(dest) = (dest) + ((size) - 1) * (stride); \
|
||||
(stride) = - (stride); \
|
||||
} while (0)
|
||||
|
||||
static void maybe_flip_strides(uint8_t **dst, int *dstride,
|
||||
tran_low_t **src, int *sstride,
|
||||
int tx_type, int size) {
|
||||
// Note that the transpose of src will be added to dst. In order to LR
|
||||
// flip the addends (in dst coordinates), we UD flip the src. To UD flip
|
||||
// the addends, we UD flip the dst.
|
||||
switch (tx_type) {
|
||||
case DCT_DCT:
|
||||
case ADST_DCT:
|
||||
case DCT_ADST:
|
||||
case ADST_ADST:
|
||||
break;
|
||||
case FLIPADST_DCT:
|
||||
case FLIPADST_ADST:
|
||||
// flip UD
|
||||
FLIPUD_PTR(*dst, *dstride, size);
|
||||
break;
|
||||
case DCT_FLIPADST:
|
||||
case ADST_FLIPADST:
|
||||
// flip LR
|
||||
FLIPUD_PTR(*src, *sstride, size);
|
||||
break;
|
||||
case FLIPADST_FLIPADST:
|
||||
// flip UD
|
||||
FLIPUD_PTR(*dst, *dstride, size);
|
||||
// flip LR
|
||||
FLIPUD_PTR(*src, *sstride, size);
|
||||
break;
|
||||
case DST_DST:
|
||||
case DCT_DST:
|
||||
case DST_DCT:
|
||||
case DST_ADST:
|
||||
case ADST_DST:
|
||||
break;
|
||||
case DST_FLIPADST:
|
||||
// flip LR
|
||||
FLIPUD_PTR(*src, *sstride, size);
|
||||
break;
|
||||
case FLIPADST_DST:
|
||||
// flip UD
|
||||
FLIPUD_PTR(*dst, *dstride, size);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void idst4(const tran_low_t *input, tran_low_t *output) {
|
||||
// {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
|
||||
static const int32_t sinvalue_lookup[] = {
|
||||
@ -635,25 +688,41 @@ void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
};
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[4 * 4];
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[4], temp_out[4];
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[4][4];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 4;
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
IHT_4[tx_type].rows(input, outptr);
|
||||
IHT_4[tx_type].rows(input, out[i]);
|
||||
input += 4;
|
||||
outptr += 4;
|
||||
}
|
||||
|
||||
// transpose
|
||||
for (i = 1 ; i < 4; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = out[j * 4 + i];
|
||||
IHT_4[tx_type].cols(temp_in, temp_out);
|
||||
IHT_4[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4);
|
||||
#endif
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
||||
ROUND_POWER_OF_TWO(temp_out[j], 4));
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -756,97 +825,44 @@ static const transform_2d IHT_8[] = {
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void fliplr(uint8_t *dest, int stride, int l) {
|
||||
int i, j;
|
||||
for (i = 0; i < l; ++i) {
|
||||
for (j = 0; j < l / 2; ++j) {
|
||||
const uint8_t tmp = dest[i * stride + j];
|
||||
dest[i * stride + j] = dest[i * stride + l - 1 - j];
|
||||
dest[i * stride + l - 1 - j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flipud(uint8_t *dest, int stride, int l) {
|
||||
int i, j;
|
||||
for (j = 0; j < l; ++j) {
|
||||
for (i = 0; i < l / 2; ++i) {
|
||||
const uint8_t tmp = dest[i * stride + j];
|
||||
dest[i * stride + j] = dest[(l - 1 - i) * stride + j];
|
||||
dest[(l - 1 - i) * stride + j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fliplrud(uint8_t *dest, int stride, int l) {
|
||||
int i, j;
|
||||
for (i = 0; i < l / 2; ++i) {
|
||||
for (j = 0; j < l; ++j) {
|
||||
const uint8_t tmp = dest[i * stride + j];
|
||||
dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j];
|
||||
dest[(l - 1 - i) * stride + l - 1 - j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fliplr16(uint16_t *dest, int stride, int l) {
|
||||
int i, j;
|
||||
for (i = 0; i < l; ++i) {
|
||||
for (j = 0; j < l / 2; ++j) {
|
||||
const uint16_t tmp = dest[i * stride + j];
|
||||
dest[i * stride + j] = dest[i * stride + l - 1 - j];
|
||||
dest[i * stride + l - 1 - j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flipud16(uint16_t *dest, int stride, int l) {
|
||||
int i, j;
|
||||
for (j = 0; j < l; ++j) {
|
||||
for (i = 0; i < l / 2; ++i) {
|
||||
const uint16_t tmp = dest[i * stride + j];
|
||||
dest[i * stride + j] = dest[(l - 1 - i) * stride + j];
|
||||
dest[(l - 1 - i) * stride + j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fliplrud16(uint16_t *dest, int stride, int l) {
|
||||
int i, j;
|
||||
for (i = 0; i < l / 2; ++i) {
|
||||
for (j = 0; j < l; ++j) {
|
||||
const uint16_t tmp = dest[i * stride + j];
|
||||
dest[i * stride + j] = dest[(l - 1 - i) * stride + l - 1 - j];
|
||||
dest[(l - 1 - i) * stride + l - 1 - j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
int i, j;
|
||||
tran_low_t out[8 * 8];
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[8], temp_out[8];
|
||||
const transform_2d ht = IHT_8[tx_type];
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[8][8];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 8;
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
ht.rows(input, outptr);
|
||||
input += 8;
|
||||
outptr += 8;
|
||||
IHT_8[tx_type].rows(input, out[i]);
|
||||
input += 8;
|
||||
}
|
||||
|
||||
// transpose
|
||||
for (i = 1 ; i < 8; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = out[j * 8 + i];
|
||||
ht.cols(temp_in, temp_out);
|
||||
IHT_8[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8);
|
||||
#endif
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j) {
|
||||
dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
||||
ROUND_POWER_OF_TWO(temp_out[j], 5));
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1291,26 +1307,41 @@ static const transform_2d IHT_16[] = {
|
||||
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
int i, j;
|
||||
tran_low_t out[16 * 16];
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[16], temp_out[16];
|
||||
const transform_2d ht = IHT_16[tx_type];
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[16][16];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 16;
|
||||
|
||||
// Rows
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 16; ++i) {
|
||||
ht.rows(input, outptr);
|
||||
input += 16;
|
||||
outptr += 16;
|
||||
IHT_16[tx_type].rows(input, out[i]);
|
||||
input += 16;
|
||||
}
|
||||
|
||||
// Columns
|
||||
// transpose
|
||||
for (i = 1 ; i < 16; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 16; ++i) {
|
||||
IHT_16[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16);
|
||||
#endif
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = out[j * 16 + i];
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
||||
ROUND_POWER_OF_TWO(temp_out[j], 6));
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1911,26 +1942,6 @@ void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_iht4x4_16_add_c(input, dest, stride, tx_type);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud(dest, stride, 4);
|
||||
vp9_iht4x4_16_add(input, dest, stride, ADST_DCT);
|
||||
flipud(dest, stride, 4);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr(dest, stride, 4);
|
||||
vp9_iht4x4_16_add(input, dest, stride, DCT_ADST);
|
||||
fliplr(dest, stride, 4);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud(dest, stride, 4);
|
||||
vp9_iht4x4_16_add(input, dest, stride, ADST_ADST);
|
||||
fliplrud(dest, stride, 4);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr(dest, stride, 4);
|
||||
vp9_iht4x4_16_add(input, dest, stride, ADST_ADST);
|
||||
fliplr(dest, stride, 4);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud(dest, stride, 4);
|
||||
vp9_iht4x4_16_add(input, dest, stride, ADST_ADST);
|
||||
flipud(dest, stride, 4);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_iht4x4_16_add(input, dest, stride, tx_type);
|
||||
@ -1944,26 +1955,6 @@ void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_iht8x8_64_add_c(input, dest, stride, tx_type);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud(dest, stride, 8);
|
||||
vp9_iht8x8_64_add(input, dest, stride, ADST_DCT);
|
||||
flipud(dest, stride, 8);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr(dest, stride, 8);
|
||||
vp9_iht8x8_64_add(input, dest, stride, DCT_ADST);
|
||||
fliplr(dest, stride, 8);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud(dest, stride, 8);
|
||||
vp9_iht8x8_64_add(input, dest, stride, ADST_ADST);
|
||||
fliplrud(dest, stride, 8);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr(dest, stride, 8);
|
||||
vp9_iht8x8_64_add(input, dest, stride, ADST_ADST);
|
||||
fliplr(dest, stride, 8);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud(dest, stride, 8);
|
||||
vp9_iht8x8_64_add(input, dest, stride, ADST_ADST);
|
||||
flipud(dest, stride, 8);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_iht8x8_64_add(input, dest, stride, tx_type);
|
||||
@ -1977,26 +1968,6 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_iht16x16_256_add_c(input, dest, stride, tx_type);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud(dest, stride, 16);
|
||||
vp9_iht16x16_256_add(input, dest, stride, ADST_DCT);
|
||||
flipud(dest, stride, 16);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr(dest, stride, 16);
|
||||
vp9_iht16x16_256_add(input, dest, stride, DCT_ADST);
|
||||
fliplr(dest, stride, 16);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud(dest, stride, 16);
|
||||
vp9_iht16x16_256_add(input, dest, stride, ADST_ADST);
|
||||
fliplrud(dest, stride, 16);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr(dest, stride, 16);
|
||||
vp9_iht16x16_256_add(input, dest, stride, ADST_ADST);
|
||||
fliplr(dest, stride, 16);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud(dest, stride, 16);
|
||||
vp9_iht16x16_256_add(input, dest, stride, ADST_ADST);
|
||||
flipud(dest, stride, 16);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_iht16x16_256_add(input, dest, stride, tx_type);
|
||||
@ -2775,7 +2746,7 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
|
||||
|
||||
void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
const highbd_transform_2d IHT_4[] = {
|
||||
const highbd_transform_2d HIGH_IHT_4[] = {
|
||||
{ vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0
|
||||
{ highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1
|
||||
{ vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2
|
||||
@ -2798,25 +2769,43 @@ void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t out[4 * 4];
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[4], temp_out[4];
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[4][4];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 4;
|
||||
|
||||
// Inverse transform row vectors.
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
IHT_4[tx_type].rows(input, outptr, bd);
|
||||
HIGH_IHT_4[tx_type].rows(input, out[i], bd);
|
||||
input += 4;
|
||||
outptr += 4;
|
||||
}
|
||||
|
||||
// Inverse transform column vectors.
|
||||
// transpose
|
||||
for (i = 1 ; i < 4; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides((uint8_t**)&dest,
|
||||
&stride, &outp, &outstride, tx_type, 4 * 2);
|
||||
#endif
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
temp_in[j] = out[j * 4 + i];
|
||||
IHT_4[tx_type].cols(temp_in, temp_out, bd);
|
||||
for (j = 0; j < 4; ++j) {
|
||||
dest[j * stride + i] = highbd_clip_pixel_add(
|
||||
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 4), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2921,28 +2910,46 @@ static const highbd_transform_2d HIGH_IHT_8[] = {
|
||||
|
||||
void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
int i, j;
|
||||
tran_low_t out[8 * 8];
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[8], temp_out[8];
|
||||
const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
// Inverse transform row vectors.
|
||||
int i, j;
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[8][8];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 8;
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
ht.rows(input, outptr, bd);
|
||||
input += 8;
|
||||
outptr += 8;
|
||||
HIGH_IHT_8[tx_type].rows(input, out[i], bd);
|
||||
input += 8;
|
||||
}
|
||||
|
||||
// Inverse transform column vectors.
|
||||
// transpose
|
||||
for (i = 1 ; i < 8; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides((uint8_t**)&dest,
|
||||
&stride, &outp, &outstride, tx_type, 8 * 2);
|
||||
#endif
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 8; ++i) {
|
||||
for (j = 0; j < 8; ++j)
|
||||
temp_in[j] = out[j * 8 + i];
|
||||
ht.cols(temp_in, temp_out, bd);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
dest[j * stride + i] = highbd_clip_pixel_add(
|
||||
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 5), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3361,28 +3368,46 @@ static const highbd_transform_2d HIGH_IHT_16[] = {
|
||||
|
||||
void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
int i, j;
|
||||
tran_low_t out[16 * 16];
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[16], temp_out[16];
|
||||
const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
// Rows
|
||||
int i, j;
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[16][16];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 16;
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 16; ++i) {
|
||||
ht.rows(input, outptr, bd);
|
||||
input += 16;
|
||||
outptr += 16;
|
||||
HIGH_IHT_16[tx_type].rows(input, out[i], bd);
|
||||
input += 16;
|
||||
}
|
||||
|
||||
// Columns
|
||||
// transpose
|
||||
for (i = 1 ; i < 16; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 16; ++i) {
|
||||
HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
maybe_flip_strides((uint8_t**)&dest, &stride,
|
||||
&outp, &outstride, tx_type, 16 * 2);
|
||||
#endif
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 16; ++i) {
|
||||
for (j = 0; j < 16; ++j)
|
||||
temp_in[j] = out[j * 16 + i];
|
||||
ht.cols(temp_in, temp_out, bd);
|
||||
for (j = 0; j < 16; ++j) {
|
||||
dest[j * stride + i] = highbd_clip_pixel_add(
|
||||
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3954,26 +3979,6 @@ void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
vp9_highbd_iht4x4_16_add(input, dest, stride, ADST_DCT, bd);
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
vp9_highbd_iht4x4_16_add(input, dest, stride, DCT_ADST, bd);
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
vp9_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd);
|
||||
fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
vp9_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd);
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
vp9_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd);
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
|
||||
@ -3987,26 +3992,6 @@ void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
vp9_highbd_iht8x8_64_add(input, dest, stride, ADST_DCT, bd);
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
vp9_highbd_iht8x8_64_add(input, dest, stride, DCT_ADST, bd);
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
vp9_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd);
|
||||
fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
vp9_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd);
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
vp9_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd);
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
|
||||
@ -4020,26 +4005,6 @@ void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, ADST_DCT, bd);
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, DCT_ADST, bd);
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd);
|
||||
fliplrud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd);
|
||||
fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd);
|
||||
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
|
||||
@ -4276,6 +4241,19 @@ void vp9_iht4x4_16_c(const tran_low_t *input, int16_t *dest, int stride,
|
||||
tran_low_t *outptr = out;
|
||||
tran_low_t temp_in[4], temp_out[4];
|
||||
|
||||
// FIXME: If the SR_MODE experiment is resurrected, then this function must
|
||||
// be fixed to handle the FLIPADST cases by actually flipping its output
|
||||
// See the other vp9_iht*add_c functions
|
||||
#if CONFIG_EXT_TX
|
||||
assert(tx_type != FLIPADST_DCT);
|
||||
assert(tx_type != DCT_FLIPADST);
|
||||
assert(tx_type != FLIPADST_FLIPADST);
|
||||
assert(tx_type != ADST_FLIPADST);
|
||||
assert(tx_type != FLIPADST_ADST);
|
||||
assert(tx_type != DST_FLIPADST);
|
||||
assert(tx_type != FLIPADST_DST);
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 4; ++i) {
|
||||
IHT_4[tx_type].rows(input, outptr);
|
||||
@ -4302,6 +4280,19 @@ void vp9_iht8x8_64_c(const tran_low_t *input, int16_t *dest, int stride,
|
||||
tran_low_t temp_in[8], temp_out[8];
|
||||
const transform_2d ht = IHT_8[tx_type];
|
||||
|
||||
// FIXME: If the SR_MODE experiment is resurrected, then this function must
|
||||
// be fixed to handle the FLIPADST cases by actually flipping its output
|
||||
// See the other vp9_iht*add_c functions
|
||||
#if CONFIG_EXT_TX
|
||||
assert(tx_type != FLIPADST_DCT);
|
||||
assert(tx_type != DCT_FLIPADST);
|
||||
assert(tx_type != FLIPADST_FLIPADST);
|
||||
assert(tx_type != ADST_FLIPADST);
|
||||
assert(tx_type != FLIPADST_ADST);
|
||||
assert(tx_type != DST_FLIPADST);
|
||||
assert(tx_type != FLIPADST_DST);
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 8; ++i) {
|
||||
ht.rows(input, outptr);
|
||||
@ -4378,6 +4369,19 @@ void vp9_iht16x16_256_c(const tran_low_t *input, int16_t *dest, int stride,
|
||||
tran_low_t temp_in[16], temp_out[16];
|
||||
const transform_2d ht = IHT_16[tx_type];
|
||||
|
||||
// FIXME: If the SR_MODE experiment is resurrected, then this function must
|
||||
// be fixed to handle the FLIPADST cases by actually flipping its output
|
||||
// See the other vp9_iht*add_c functions
|
||||
#if CONFIG_EXT_TX
|
||||
assert(tx_type != FLIPADST_DCT);
|
||||
assert(tx_type != DCT_FLIPADST);
|
||||
assert(tx_type != FLIPADST_FLIPADST);
|
||||
assert(tx_type != ADST_FLIPADST);
|
||||
assert(tx_type != FLIPADST_ADST);
|
||||
assert(tx_type != DST_FLIPADST);
|
||||
assert(tx_type != FLIPADST_DST);
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 16; ++i) {
|
||||
ht.rows(input, outptr);
|
||||
@ -4582,26 +4586,6 @@ void vp9_iht4x4(TX_TYPE tx_type, const tran_low_t *input, int16_t *dest,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_iht4x4_16_c(input, dest, stride, tx_type);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud(dest, stride, 4);
|
||||
vp9_iht4x4_16(input, dest, stride, ADST_DCT);
|
||||
flipud(dest, stride, 4);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr(dest, stride, 4);
|
||||
vp9_iht4x4_16(input, dest, stride, DCT_ADST);
|
||||
fliplr(dest, stride, 4);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud(dest, stride, 4);
|
||||
vp9_iht4x4_16(input, dest, stride, ADST_ADST);
|
||||
fliplrud(dest, stride, 4);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr(dest, stride, 4);
|
||||
vp9_iht4x4_16(input, dest, stride, ADST_ADST);
|
||||
fliplr(dest, stride, 4);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud(dest, stride, 4);
|
||||
vp9_iht4x4_16(input, dest, stride, ADST_ADST);
|
||||
flipud(dest, stride, 4);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_iht4x4_16(input, dest, stride, tx_type);
|
||||
@ -4615,26 +4599,6 @@ void vp9_iht8x8(TX_TYPE tx_type, const tran_low_t *input, int16_t *dest,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_iht8x8_64_c(input, dest, stride, tx_type);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud(dest, stride, 8);
|
||||
vp9_iht8x8_64(input, dest, stride, ADST_DCT);
|
||||
flipud(dest, stride, 8);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr(dest, stride, 8);
|
||||
vp9_iht8x8_64(input, dest, stride, DCT_ADST);
|
||||
fliplr(dest, stride, 8);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud(dest, stride, 8);
|
||||
vp9_iht8x8_64(input, dest, stride, ADST_ADST);
|
||||
fliplrud(dest, stride, 8);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr(dest, stride, 8);
|
||||
vp9_iht8x8_64(input, dest, stride, ADST_ADST);
|
||||
fliplr(dest, stride, 8);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud(dest, stride, 8);
|
||||
vp9_iht8x8_64(input, dest, stride, ADST_ADST);
|
||||
flipud(dest, stride, 8);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_iht8x8_64(input, dest, stride, tx_type);
|
||||
@ -4648,26 +4612,6 @@ void vp9_iht16x16(TX_TYPE tx_type, const tran_low_t *input, int16_t *dest,
|
||||
#if CONFIG_EXT_TX
|
||||
} else if (is_dst_used(tx_type)) {
|
||||
vp9_iht16x16_256_c(input, dest, stride, tx_type);
|
||||
} else if (tx_type == FLIPADST_DCT) {
|
||||
flipud(dest, stride, 16);
|
||||
vp9_iht16x16_256(input, dest, stride, ADST_DCT);
|
||||
flipud(dest, stride, 16);
|
||||
} else if (tx_type == DCT_FLIPADST) {
|
||||
fliplr(dest, stride, 16);
|
||||
vp9_iht16x16_256(input, dest, stride, DCT_ADST);
|
||||
fliplr(dest, stride, 16);
|
||||
} else if (tx_type == FLIPADST_FLIPADST) {
|
||||
fliplrud(dest, stride, 16);
|
||||
vp9_iht16x16_256(input, dest, stride, ADST_ADST);
|
||||
fliplrud(dest, stride, 16);
|
||||
} else if (tx_type == ADST_FLIPADST) {
|
||||
fliplr(dest, stride, 16);
|
||||
vp9_iht16x16_256(input, dest, stride, ADST_ADST);
|
||||
fliplr(dest, stride, 16);
|
||||
} else if (tx_type == FLIPADST_ADST) {
|
||||
flipud(dest, stride, 16);
|
||||
vp9_iht16x16_256(input, dest, stride, ADST_ADST);
|
||||
flipud(dest, stride, 16);
|
||||
#endif // CONFIG_EXT_TX
|
||||
} else {
|
||||
vp9_iht16x16_256(input, dest, stride, tx_type);
|
||||
|
@ -11,6 +11,55 @@
|
||||
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
|
||||
#include "vp9/common/vp9_idct.h"
|
||||
|
||||
#include "vp9/common/vp9_enums.h"
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
// Reverse the 8 16 bit words in __m128i
|
||||
static INLINE __m128i mm_reverse_epi16(const __m128i x) {
|
||||
const __m128i a = _mm_shufflelo_epi16(x, 0x1b);
|
||||
const __m128i b = _mm_shufflehi_epi16(a, 0x1b);
|
||||
return _mm_shuffle_epi32(b, 0x4e);
|
||||
}
|
||||
|
||||
static INLINE void fliplr_4x4(__m128i in[2]) {
|
||||
in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
|
||||
in[0] = _mm_shufflehi_epi16(in[0], 0x1b);
|
||||
in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
|
||||
in[1] = _mm_shufflehi_epi16(in[1], 0x1b);
|
||||
}
|
||||
|
||||
static INLINE void fliplr_8x8(__m128i in[8]) {
|
||||
in[0] = mm_reverse_epi16(in[0]);
|
||||
in[1] = mm_reverse_epi16(in[1]);
|
||||
in[2] = mm_reverse_epi16(in[2]);
|
||||
in[3] = mm_reverse_epi16(in[3]);
|
||||
|
||||
in[4] = mm_reverse_epi16(in[4]);
|
||||
in[5] = mm_reverse_epi16(in[5]);
|
||||
in[6] = mm_reverse_epi16(in[6]);
|
||||
in[7] = mm_reverse_epi16(in[7]);
|
||||
}
|
||||
|
||||
static INLINE void fliplr_16x8(__m128i in[16]) {
|
||||
fliplr_8x8(&in[0]);
|
||||
fliplr_8x8(&in[8]);
|
||||
}
|
||||
|
||||
#define FLIPLR_16x16(in0, in1) do { \
|
||||
__m128i *tmp; \
|
||||
fliplr_16x8(in0); \
|
||||
fliplr_16x8(in1); \
|
||||
tmp = (in0); \
|
||||
(in0) = (in1); \
|
||||
(in1) = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define FLIPUD_PTR(dest, stride, size) do { \
|
||||
(dest) = (dest) + ((size) - 1) * (stride); \
|
||||
(stride) = - (stride); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define RECON_AND_STORE4X4(dest, in_x) \
|
||||
{ \
|
||||
__m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
|
||||
@ -126,12 +175,12 @@ void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
|
||||
|
||||
// Reconstruction and Store
|
||||
{
|
||||
__m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
|
||||
__m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
|
||||
__m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
|
||||
__m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
|
||||
d0 = _mm_unpacklo_epi32(d0,
|
||||
_mm_cvtsi32_si128(*(const int *) (dest + stride)));
|
||||
d2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(
|
||||
*(const int *) (dest + stride * 3)), d2);
|
||||
__m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
|
||||
d0 = _mm_unpacklo_epi32(d0, d1);
|
||||
d2 = _mm_unpacklo_epi32(d3, d2);
|
||||
d0 = _mm_unpacklo_epi8(d0, zero);
|
||||
d2 = _mm_unpacklo_epi8(d2, zero);
|
||||
d0 = _mm_add_epi16(d0, input2);
|
||||
@ -271,22 +320,50 @@ void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
|
||||
in[1]= _mm_loadu_si128((const __m128i *)(input + 8));
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
case DCT_DCT:
|
||||
idct4_sse2(in);
|
||||
idct4_sse2(in);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
idct4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
iadst4_sse2(in);
|
||||
idct4_sse2(in);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
iadst4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
idct4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
FLIPUD_PTR(dest, stride, 4);
|
||||
break;
|
||||
case DCT_FLIPADST:
|
||||
iadst4_sse2(in);
|
||||
idct4_sse2(in);
|
||||
fliplr_4x4(in);
|
||||
break;
|
||||
case FLIPADST_FLIPADST:
|
||||
iadst4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
FLIPUD_PTR(dest, stride, 4);
|
||||
fliplr_4x4(in);
|
||||
break;
|
||||
case ADST_FLIPADST:
|
||||
iadst4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
fliplr_4x4(in);
|
||||
break;
|
||||
case FLIPADST_ADST:
|
||||
iadst4_sse2(in);
|
||||
iadst4_sse2(in);
|
||||
FLIPUD_PTR(dest, stride, 4);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
@ -875,22 +952,50 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
|
||||
in[7] = _mm_load_si128((const __m128i *)(input + 8 * 7));
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
case DCT_DCT:
|
||||
idct8_sse2(in);
|
||||
idct8_sse2(in);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
idct8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
iadst8_sse2(in);
|
||||
idct8_sse2(in);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
iadst8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
idct8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
FLIPUD_PTR(dest, stride, 8);
|
||||
break;
|
||||
case DCT_FLIPADST:
|
||||
iadst8_sse2(in);
|
||||
idct8_sse2(in);
|
||||
fliplr_8x8(in);
|
||||
break;
|
||||
case FLIPADST_FLIPADST:
|
||||
iadst8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
FLIPUD_PTR(dest, stride, 8);
|
||||
fliplr_8x8(in);
|
||||
break;
|
||||
case ADST_FLIPADST:
|
||||
iadst8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
fliplr_8x8(in);
|
||||
break;
|
||||
case FLIPADST_ADST:
|
||||
iadst8_sse2(in);
|
||||
iadst8_sse2(in);
|
||||
FLIPUD_PTR(dest, stride, 8);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
@ -2331,29 +2436,59 @@ static void iadst16_sse2(__m128i *in0, __m128i *in1) {
|
||||
|
||||
void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
|
||||
int tx_type) {
|
||||
__m128i in0[16], in1[16];
|
||||
__m128i in[32];
|
||||
__m128i *in0 = &in[0];
|
||||
__m128i *in1 = &in[16];
|
||||
|
||||
load_buffer_8x16(input, in0);
|
||||
input += 8;
|
||||
load_buffer_8x16(input, in1);
|
||||
|
||||
switch (tx_type) {
|
||||
case 0: // DCT_DCT
|
||||
case DCT_DCT:
|
||||
idct16_sse2(in0, in1);
|
||||
idct16_sse2(in0, in1);
|
||||
break;
|
||||
case 1: // ADST_DCT
|
||||
case ADST_DCT:
|
||||
idct16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
break;
|
||||
case 2: // DCT_ADST
|
||||
case DCT_ADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
idct16_sse2(in0, in1);
|
||||
break;
|
||||
case 3: // ADST_ADST
|
||||
case ADST_ADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case FLIPADST_DCT:
|
||||
idct16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
FLIPUD_PTR(dest, stride, 16);
|
||||
break;
|
||||
case DCT_FLIPADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
idct16_sse2(in0, in1);
|
||||
FLIPLR_16x16(in0, in1);
|
||||
break;
|
||||
case FLIPADST_FLIPADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
FLIPUD_PTR(dest, stride, 16);
|
||||
FLIPLR_16x16(in0, in1);
|
||||
break;
|
||||
case ADST_FLIPADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
FLIPLR_16x16(in0, in1);
|
||||
break;
|
||||
case FLIPADST_ADST:
|
||||
iadst16_sse2(in0, in1);
|
||||
iadst16_sse2(in0, in1);
|
||||
FLIPUD_PTR(dest, stride, 16);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user