Merge "Extends ext-tx to support 32x32 masked transforms" into nextgenv2
This commit is contained in:
@@ -259,6 +259,73 @@ void idst16_c(const tran_low_t *input, tran_low_t *output) {
|
||||
output[15] = WRAPLOW(-step2[0] + step2[15], 8);
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
// For use in lieu of DST
|
||||
static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
|
||||
int i;
|
||||
tran_low_t inputhalf[16];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
output[i] = input[16 + i] * 4;
|
||||
output[24 + i] = input[24 + i] * 4;
|
||||
}
|
||||
// Multiply input by sqrt(2)
|
||||
for (i = 0; i < 16; ++i) {
|
||||
inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
|
||||
}
|
||||
idct16_c(inputhalf, output + 8);
|
||||
// Note overall scaling factor is 4 times orthogonal
|
||||
}
|
||||
|
||||
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
|
||||
int i;
|
||||
tran_low_t inputhalf[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
output[i] = input[16 + i] * 4;
|
||||
}
|
||||
// Multiply input by sqrt(2)
|
||||
for (i = 0; i < 16; ++i) {
|
||||
inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
|
||||
}
|
||||
idct16_c(inputhalf, output + 16);
|
||||
// Note overall scaling factor is 4 times orthogonal
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
|
||||
int bd) {
|
||||
int i;
|
||||
tran_low_t inputhalf[16];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
output[i] = input[16 + i] * 4;
|
||||
output[24 + i] = input[24 + i] * 4;
|
||||
}
|
||||
// Multiply input by sqrt(2)
|
||||
for (i = 0; i < 16; ++i) {
|
||||
inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
|
||||
input[i] * Sqrt2, bd);
|
||||
}
|
||||
vpx_highbd_idct16_c(inputhalf, output + 8, bd);
|
||||
// Note overall scaling factor is 4 times orthogonal
|
||||
}
|
||||
|
||||
static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
|
||||
int bd) {
|
||||
int i;
|
||||
tran_low_t inputhalf[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
output[i] = input[16 + i] * 4;
|
||||
}
|
||||
// Multiply input by sqrt(2)
|
||||
for (i = 0; i < 16; ++i) {
|
||||
inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
|
||||
input[i] * Sqrt2, bd);
|
||||
}
|
||||
vpx_highbd_idct16_c(inputhalf, output + 16, bd);
|
||||
// Note overall scaling factor is 4 times orthogonal
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
// Inverse identiy transform and add.
|
||||
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int bs) {
|
||||
@@ -808,6 +875,67 @@ void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int tx_type) {
|
||||
static const transform_2d IHT_32[] = {
|
||||
{ idct32_c, idct32_c }, // DCT_DCT = 0,
|
||||
{ ihalfright32_c, idct32_c }, // ADST_DCT = 1,
|
||||
{ idct32_c, ihalfright32_c }, // DCT_ADST = 2,
|
||||
{ ihalfright32_c, ihalfright32_c }, // ADST_ADST = 3,
|
||||
{ ihalfright32_c, idct32_c }, // FLIPADST_DCT = 4,
|
||||
{ idct32_c, ihalfright32_c }, // DCT_FLIPADST = 5,
|
||||
{ ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST = 6,
|
||||
{ ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST = 7,
|
||||
{ ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST = 8,
|
||||
{ ihalfcenter32_c, idct32_c }, // DST_DCT = 9,
|
||||
{ idct32_c, ihalfcenter32_c }, // DCT_DST = 10,
|
||||
{ ihalfcenter32_c, ihalfright32_c }, // DST_ADST = 11,
|
||||
{ ihalfright32_c, ihalfcenter32_c }, // ADST_DST = 12,
|
||||
{ ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
|
||||
{ ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
|
||||
{ ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
|
||||
};
|
||||
|
||||
int i, j;
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[32][32];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 32;
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 32; ++i) {
|
||||
IHT_32[tx_type].rows(input, out[i]);
|
||||
input += 32;
|
||||
}
|
||||
|
||||
// transpose
|
||||
for (i = 1 ; i < 32; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 32; ++i) {
|
||||
IHT_32[tx_type].cols(out[i], out[i]);
|
||||
}
|
||||
|
||||
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
// idct
|
||||
void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob) {
|
||||
@@ -998,15 +1126,27 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
|
||||
vp10_idct32x32_add(input, dest, stride, eob);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case ADST_DCT:
|
||||
case DCT_ADST:
|
||||
case ADST_ADST:
|
||||
case FLIPADST_DCT:
|
||||
case DCT_FLIPADST:
|
||||
case FLIPADST_FLIPADST:
|
||||
case ADST_FLIPADST:
|
||||
case FLIPADST_ADST:
|
||||
case DST_DST:
|
||||
case DST_DCT:
|
||||
case DCT_DST:
|
||||
case DST_ADST:
|
||||
case ADST_DST:
|
||||
case FLIPADST_DST:
|
||||
case DST_FLIPADST:
|
||||
vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
|
||||
break;
|
||||
case IDTX:
|
||||
inv_idtx_add_c(input, dest, stride, 32);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case ADST_DCT:
|
||||
case DCT_ADST:
|
||||
case ADST_ADST:
|
||||
assert(0);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
@@ -1212,6 +1352,70 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int tx_type, int bd) {
|
||||
static const highbd_transform_2d HIGH_IHT_32[] = {
|
||||
{ vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
|
||||
{ highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
|
||||
{ vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
|
||||
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
|
||||
{ highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
|
||||
{ vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
|
||||
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
|
||||
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
|
||||
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
|
||||
{ highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
|
||||
{ vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
|
||||
{ highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
|
||||
{ highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
|
||||
{ highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
|
||||
{ highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
|
||||
{ highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
|
||||
};
|
||||
|
||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
||||
|
||||
int i, j;
|
||||
tran_low_t tmp;
|
||||
tran_low_t out[32][32];
|
||||
tran_low_t *outp = &out[0][0];
|
||||
int outstride = 32;
|
||||
|
||||
// inverse transform row vectors
|
||||
for (i = 0; i < 32; ++i) {
|
||||
HIGH_IHT_32[tx_type].rows(input, out[i], bd);
|
||||
input += 32;
|
||||
}
|
||||
|
||||
// transpose
|
||||
for (i = 1 ; i < 32; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
tmp = out[i][j];
|
||||
out[i][j] = out[j][i];
|
||||
out[j][i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// inverse transform column vectors
|
||||
for (i = 0; i < 32; ++i) {
|
||||
HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
|
||||
}
|
||||
|
||||
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32);
|
||||
|
||||
// Sum with the destination
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j) {
|
||||
int d = i * stride + j;
|
||||
int s = j * outstride + i;
|
||||
dest[d] = highbd_clip_pixel_add(dest[d],
|
||||
ROUND_POWER_OF_TWO(outp[s], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
// idct
|
||||
void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob, int bd) {
|
||||
@@ -1409,15 +1613,27 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
|
||||
vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
|
||||
break;
|
||||
#if CONFIG_EXT_TX
|
||||
case ADST_DCT:
|
||||
case DCT_ADST:
|
||||
case ADST_ADST:
|
||||
case FLIPADST_DCT:
|
||||
case DCT_FLIPADST:
|
||||
case FLIPADST_FLIPADST:
|
||||
case ADST_FLIPADST:
|
||||
case FLIPADST_ADST:
|
||||
case DST_DST:
|
||||
case DST_DCT:
|
||||
case DCT_DST:
|
||||
case DST_ADST:
|
||||
case ADST_DST:
|
||||
case FLIPADST_DST:
|
||||
case DST_FLIPADST:
|
||||
vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
|
||||
break;
|
||||
case IDTX:
|
||||
highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
|
||||
break;
|
||||
#endif // CONFIG_EXT_TX
|
||||
case ADST_DCT:
|
||||
case DCT_ADST:
|
||||
case ADST_ADST:
|
||||
assert(0);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
|
@@ -404,6 +404,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht16x16 sse2/;
|
||||
|
||||
add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht32x32/;
|
||||
|
||||
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
|
||||
} else {
|
||||
@@ -416,6 +419,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht16x16 sse2 msa/;
|
||||
|
||||
add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht32x32/;
|
||||
|
||||
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
|
||||
}
|
||||
@@ -642,6 +648,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht16x16/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_highbd_fht32x32/;
|
||||
|
||||
add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp10_highbd_fwht4x4/;
|
||||
|
||||
|
@@ -14,7 +14,6 @@
|
||||
#include "./vp10_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "vp10/common/blockd.h"
|
||||
#include "vp10/common/idct.h"
|
||||
#include "vpx_dsp/fwd_txfm.h"
|
||||
@@ -538,7 +537,7 @@ static void fdct16(const tran_low_t *input, tran_low_t *output) {
|
||||
range_check(output, 16, 16);
|
||||
}
|
||||
|
||||
/* TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
|
||||
#if CONFIG_EXT_TX
|
||||
static void fdct32(const tran_low_t *input, tran_low_t *output) {
|
||||
tran_high_t temp;
|
||||
tran_low_t step[32];
|
||||
@@ -936,7 +935,7 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) {
|
||||
|
||||
range_check(output, 32, 18);
|
||||
}
|
||||
*/
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
static void fadst4(const tran_low_t *input, tran_low_t *output) {
|
||||
tran_high_t x0, x1, x2, x3;
|
||||
@@ -1213,6 +1212,37 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
|
||||
}
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
// For use in lieu of DST
|
||||
static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
|
||||
int i;
|
||||
tran_low_t inputhalf[16];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
output[16 + i] = input[i] * 4;
|
||||
output[24 + i] = input[24 + i] * 4;
|
||||
}
|
||||
// Multiply input by sqrt(2)
|
||||
for (i = 0; i < 16; ++i) {
|
||||
inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2);
|
||||
}
|
||||
fdct16(inputhalf, output);
|
||||
// Note overall scaling factor is 4 times orthogonal
|
||||
}
|
||||
|
||||
// For use in lieu of ADST
|
||||
static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
|
||||
int i;
|
||||
tran_low_t inputhalf[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
output[16 + i] = input[i] * 4;
|
||||
}
|
||||
// Multiply input by sqrt(2)
|
||||
for (i = 0; i < 16; ++i) {
|
||||
inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 16] * Sqrt2);
|
||||
}
|
||||
fdct16(inputhalf, output);
|
||||
// Note overall scaling factor is 4 times orthogonal
|
||||
}
|
||||
|
||||
static void copy_block(const int16_t *src, int src_stride, int l,
|
||||
int16_t *dest, int dest_stride) {
|
||||
int i;
|
||||
@@ -1375,6 +1405,27 @@ static const transform_2d FHT_16[] = {
|
||||
#endif // CONFIG_EXT_TX
|
||||
};
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
static const transform_2d FHT_32[] = {
|
||||
{ fdct32, fdct32 }, // DCT_DCT = 0,
|
||||
{ fhalfright32, fdct32 }, // ADST_DCT = 1,
|
||||
{ fdct32, fhalfright32 }, // DCT_ADST = 2,
|
||||
{ fhalfright32, fhalfright32 }, // ADST_ADST = 3,
|
||||
{ fhalfright32, fdct32 }, // FLIPADST_DCT = 4,
|
||||
{ fdct32, fhalfright32 }, // DCT_FLIPADST = 5,
|
||||
{ fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST = 6,
|
||||
{ fhalfright32, fhalfright32 }, // ADST_FLIPADST = 7,
|
||||
{ fhalfright32, fhalfright32 }, // FLIPADST_ADST = 8,
|
||||
{ fhalfcenter32, fdct32 }, // DST_DCT = 9,
|
||||
{ fdct32, fhalfcenter32 }, // DCT_DST = 10,
|
||||
{ fhalfcenter32, fhalfright32 }, // DST_ADST = 11,
|
||||
{ fhalfright32, fhalfcenter32 }, // ADST_DST = 12,
|
||||
{ fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
|
||||
{ fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
|
||||
{ fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
|
||||
};
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
||||
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
@@ -1671,3 +1722,46 @@ void vp10_highbd_fht16x16_c(const int16_t *input, tran_low_t *output,
|
||||
vp10_fht16x16_c(input, output, stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_EXT_TX
|
||||
void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
vpx_fdct32x32_c(input, output, stride);
|
||||
} else {
|
||||
tran_low_t out[1024];
|
||||
int i, j;
|
||||
tran_low_t temp_in[32], temp_out[32];
|
||||
const transform_2d ht = FHT_32[tx_type];
|
||||
|
||||
int16_t flipped_input[32 * 32];
|
||||
maybe_flip_input(&input, &stride, 32, flipped_input, tx_type);
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = input[j * stride + i] * 4;
|
||||
ht.cols(temp_in, temp_out);
|
||||
for (j = 0; j < 32; ++j)
|
||||
out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
|
||||
}
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = out[j + i * 32];
|
||||
ht.rows(temp_in, temp_out);
|
||||
for (j = 0; j < 32; ++j)
|
||||
output[j + i * 32] =
|
||||
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
|
||||
int stride, int tx_type) {
|
||||
vp10_fht32x32_c(input, output, stride, tx_type);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // CONFIG_EXT_TX
|
||||
|
@@ -2057,8 +2057,8 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
}
|
||||
}
|
||||
|
||||
static void highbd_idct32_c(const tran_low_t *input,
|
||||
tran_low_t *output, int bd) {
|
||||
void vpx_highbd_idct32_c(const tran_low_t *input,
|
||||
tran_low_t *output, int bd) {
|
||||
tran_low_t step1[32], step2[32];
|
||||
tran_high_t temp1, temp2;
|
||||
(void) bd;
|
||||
@@ -2447,7 +2447,7 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
||||
|
||||
if (zero_coeff[0] | zero_coeff[1])
|
||||
highbd_idct32_c(input, outptr, bd);
|
||||
vpx_highbd_idct32_c(input, outptr, bd);
|
||||
else
|
||||
memset(outptr, 0, sizeof(tran_low_t) * 32);
|
||||
input += 32;
|
||||
@@ -2458,7 +2458,7 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = out[j * 32 + i];
|
||||
highbd_idct32_c(temp_in, temp_out, bd);
|
||||
vpx_highbd_idct32_c(temp_in, temp_out, bd);
|
||||
for (j = 0; j < 32; ++j) {
|
||||
dest[j * stride + i] = highbd_clip_pixel_add(
|
||||
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
||||
@@ -2477,7 +2477,7 @@ void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
// Rows
|
||||
// Only upper-left 8x8 has non-zero coeff.
|
||||
for (i = 0; i < 8; ++i) {
|
||||
highbd_idct32_c(input, outptr, bd);
|
||||
vpx_highbd_idct32_c(input, outptr, bd);
|
||||
input += 32;
|
||||
outptr += 32;
|
||||
}
|
||||
@@ -2485,7 +2485,7 @@ void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
for (i = 0; i < 32; ++i) {
|
||||
for (j = 0; j < 32; ++j)
|
||||
temp_in[j] = out[j * 32 + i];
|
||||
highbd_idct32_c(temp_in, temp_out, bd);
|
||||
vpx_highbd_idct32_c(temp_in, temp_out, bd);
|
||||
for (j = 0; j < 32; ++j) {
|
||||
dest[j * stride + i] = highbd_clip_pixel_add(
|
||||
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
||||
|
@@ -100,6 +100,7 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output);
|
||||
void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vpx_highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
|
||||
void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
|
||||
|
@@ -57,10 +57,13 @@ static const tran_high_t cospi_29_64 = 2404;
|
||||
static const tran_high_t cospi_30_64 = 1606;
|
||||
static const tran_high_t cospi_31_64 = 804;
|
||||
|
||||
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
|
||||
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
|
||||
static const tran_high_t sinpi_1_9 = 5283;
|
||||
static const tran_high_t sinpi_2_9 = 9929;
|
||||
static const tran_high_t sinpi_3_9 = 13377;
|
||||
static const tran_high_t sinpi_4_9 = 15212;
|
||||
|
||||
// 16384 * sqrt(2)
|
||||
static const tran_high_t Sqrt2 = 23170;
|
||||
|
||||
#endif // VPX_DSP_TXFM_COMMON_H_
|
||||
|
Reference in New Issue
Block a user