Clean CONVERT_TO_BYTEPTR/SHORTPTR in idct
BUG=webm:1388 Change-Id: Ida62c941f2b836d6c9e27b427a7d5008ab6dc112
This commit is contained in:
@@ -353,7 +353,7 @@ class Trans16x16TestBase {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -475,10 +475,10 @@ class Trans16x16TestBase {
|
|||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
|
inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_,
|
||||||
tx_type_);
|
tx_type_);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (bit_depth_ == VPX_BITS_8) {
|
if (bit_depth_ == VPX_BITS_8) {
|
||||||
@@ -530,8 +530,7 @@ class Trans16x16TestBase {
|
|||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16));
|
||||||
RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
|
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -585,9 +584,9 @@ class Trans16x16TestBase {
|
|||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
||||||
} else {
|
} else {
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
|
ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
|
inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,7 +275,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
|
|||||||
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
|
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ class Trans4x4TestBase {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -249,7 +249,7 @@ class Trans4x4TestBase {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ class FwdTrans8x8TestBase {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -340,7 +340,7 @@ class FwdTrans8x8TestBase {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -413,7 +413,7 @@ class FwdTrans8x8TestBase {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -497,9 +497,9 @@ class FwdTrans8x8TestBase {
|
|||||||
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
} else {
|
} else {
|
||||||
ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
|
ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
|
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
|
|||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
template <InvTxfmWithBdFunc fn>
|
template <InvTxfmWithBdFunc fn>
|
||||||
void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
|
void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
|
||||||
fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
|
fn(in, out, stride, bd);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -213,7 +213,7 @@ void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
{ vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
|
{ vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
|
||||||
{ vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
|
{ vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
|
||||||
};
|
};
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
int i, j;
|
int i, j;
|
||||||
tran_low_t out[4 * 4];
|
tran_low_t out[4 * 4];
|
||||||
@@ -252,7 +252,7 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[8], temp_out[8];
|
tran_low_t temp_in[8], temp_out[8];
|
||||||
const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
|
const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// Inverse transform row vectors.
|
// Inverse transform row vectors.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
@@ -286,7 +286,7 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[16], temp_out[16];
|
tran_low_t temp_in[16], temp_out[16];
|
||||||
const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
|
const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
for (i = 0; i < 16; ++i) {
|
for (i = 0; i < 16; ++i) {
|
||||||
|
|||||||
@@ -189,21 +189,22 @@ static void inverse_transform_block_inter(MACROBLOCKD *xd, int plane,
|
|||||||
assert(eob > 0);
|
assert(eob > 0);
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
|
||||||
if (xd->lossless) {
|
if (xd->lossless) {
|
||||||
vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
} else {
|
} else {
|
||||||
switch (tx_size) {
|
switch (tx_size) {
|
||||||
case TX_4X4:
|
case TX_4X4:
|
||||||
vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_idct4x4_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_8X8:
|
case TX_8X8:
|
||||||
vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_idct8x8_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_16X16:
|
case TX_16X16:
|
||||||
vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_idct16x16_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_32X32:
|
case TX_32X32:
|
||||||
vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
default: assert(0 && "Invalid transform size");
|
default: assert(0 && "Invalid transform size");
|
||||||
}
|
}
|
||||||
@@ -256,21 +257,22 @@ static void inverse_transform_block_intra(MACROBLOCKD *xd, int plane,
|
|||||||
assert(eob > 0);
|
assert(eob > 0);
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
|
||||||
if (xd->lossless) {
|
if (xd->lossless) {
|
||||||
vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
} else {
|
} else {
|
||||||
switch (tx_size) {
|
switch (tx_size) {
|
||||||
case TX_4X4:
|
case TX_4X4:
|
||||||
vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_8X8:
|
case TX_8X8:
|
||||||
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_16X16:
|
case TX_16X16:
|
||||||
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_32X32:
|
case TX_32X32:
|
||||||
vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
|
vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd);
|
||||||
break;
|
break;
|
||||||
default: assert(0 && "Invalid transform size");
|
default: assert(0 && "Invalid transform size");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -637,24 +637,25 @@ static void encode_block(int plane, int block, int row, int col,
|
|||||||
if (x->skip_encode || p->eobs[block] == 0) return;
|
if (x->skip_encode || p->eobs[block] == 0) return;
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
|
||||||
switch (tx_size) {
|
switch (tx_size) {
|
||||||
case TX_32X32:
|
case TX_32X32:
|
||||||
vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
|
vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
||||||
xd->bd);
|
xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_16X16:
|
case TX_16X16:
|
||||||
vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
|
vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
||||||
xd->bd);
|
xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_8X8:
|
case TX_8X8:
|
||||||
vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
|
vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
||||||
xd->bd);
|
xd->bd);
|
||||||
break;
|
break;
|
||||||
case TX_4X4:
|
case TX_4X4:
|
||||||
// this is like vp9_short_idct4x4 but has a special case around eob<=1
|
// this is like vp9_short_idct4x4 but has a special case around eob<=1
|
||||||
// which is significant (not just an optimization) for the lossless
|
// which is significant (not just an optimization) for the lossless
|
||||||
// case.
|
// case.
|
||||||
x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
|
x->highbd_itxm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
|
||||||
xd->bd);
|
xd->bd);
|
||||||
break;
|
break;
|
||||||
default: assert(0 && "Invalid transform size");
|
default: assert(0 && "Invalid transform size");
|
||||||
@@ -699,7 +700,8 @@ static void encode_block_pass1(int plane, int block, int row, int col,
|
|||||||
if (p->eobs[block] > 0) {
|
if (p->eobs[block] > 0) {
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
|
x->highbd_itxm_add(dqcoeff, CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)),
|
||||||
|
pd->dst.stride, p->eobs[block], xd->bd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
@@ -799,6 +801,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
|
|||||||
|
|
||||||
#if CONFIG_VP9_HIGHBITDEPTH
|
#if CONFIG_VP9_HIGHBITDEPTH
|
||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
|
uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
|
||||||
switch (tx_size) {
|
switch (tx_size) {
|
||||||
case TX_32X32:
|
case TX_32X32:
|
||||||
if (!x->skip_recode) {
|
if (!x->skip_recode) {
|
||||||
@@ -814,7 +817,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
|
|||||||
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
||||||
}
|
}
|
||||||
if (!x->skip_encode && *eob) {
|
if (!x->skip_encode && *eob) {
|
||||||
vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
|
vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case TX_16X16:
|
case TX_16X16:
|
||||||
@@ -834,7 +837,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
|
|||||||
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
||||||
}
|
}
|
||||||
if (!x->skip_encode && *eob) {
|
if (!x->skip_encode && *eob) {
|
||||||
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob,
|
vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
|
||||||
xd->bd);
|
xd->bd);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -855,7 +858,7 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
|
|||||||
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
*a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
|
||||||
}
|
}
|
||||||
if (!x->skip_encode && *eob) {
|
if (!x->skip_encode && *eob) {
|
||||||
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
|
vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
|
||||||
xd->bd);
|
xd->bd);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -880,9 +883,10 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
|
|||||||
// this is like vp9_short_idct4x4 but has a special case around
|
// this is like vp9_short_idct4x4 but has a special case around
|
||||||
// eob<=1 which is significant (not just an optimization) for the
|
// eob<=1 which is significant (not just an optimization) for the
|
||||||
// lossless case.
|
// lossless case.
|
||||||
x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
|
x->highbd_itxm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
|
||||||
} else {
|
} else {
|
||||||
vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
|
vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
|
||||||
|
xd->bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -601,7 +601,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
|
|||||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||||
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
|
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
|
||||||
32, NULL, 0, NULL, 0, bs, bs, xd->bd);
|
32, NULL, 0, NULL, 0, bs, bs, xd->bd);
|
||||||
recon = CONVERT_TO_BYTEPTR(recon16);
|
recon = CAST_TO_BYTEPTR(recon16);
|
||||||
if (xd->lossless) {
|
if (xd->lossless) {
|
||||||
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
|
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
|
||||||
} else {
|
} else {
|
||||||
@@ -621,6 +621,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
|
|||||||
default: assert(0 && "Invalid transform size");
|
default: assert(0 && "Invalid transform size");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
recon = CONVERT_TO_BYTEPTR(recon16);
|
||||||
} else {
|
} else {
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
|
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
|
||||||
@@ -1004,6 +1005,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
|
|||||||
const int block = (row + idy) * 2 + (col + idx);
|
const int block = (row + idy) * 2 + (col + idx);
|
||||||
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
|
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
|
||||||
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
|
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
|
||||||
|
uint8_t *const dst16 = CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst));
|
||||||
int16_t *const src_diff =
|
int16_t *const src_diff =
|
||||||
vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
|
vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
|
||||||
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
|
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
|
||||||
@@ -1025,7 +1027,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
|
|||||||
tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
|
tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
|
||||||
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
||||||
goto next_highbd;
|
goto next_highbd;
|
||||||
vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst,
|
vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16,
|
||||||
dst_stride, p->eobs[block], xd->bd);
|
dst_stride, p->eobs[block], xd->bd);
|
||||||
} else {
|
} else {
|
||||||
int64_t unused;
|
int64_t unused;
|
||||||
@@ -1048,7 +1050,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
|
|||||||
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
||||||
goto next_highbd;
|
goto next_highbd;
|
||||||
vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
|
vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
|
||||||
dst, dst_stride, p->eobs[block], xd->bd);
|
dst16, dst_stride, p->eobs[block], xd->bd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1270,7 +1270,7 @@ void vpx_highbd_idct16x16_10_add_half1d_pass2(const int32_t *input,
|
|||||||
|
|
||||||
void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
int16_t row_idct_output[16 * 16];
|
int16_t row_idct_output[16 * 16];
|
||||||
@@ -1315,7 +1315,7 @@ void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
|
|
||||||
void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
int16_t row_idct_output[16 * 16];
|
int16_t row_idct_output[16 * 16];
|
||||||
@@ -1351,7 +1351,7 @@ void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
|
|
||||||
void vpx_highbd_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
int16_t row_idct_output[4 * 16];
|
int16_t row_idct_output[4 * 16];
|
||||||
@@ -1422,7 +1422,7 @@ void vpx_highbd_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
||||||
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
|
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
|
||||||
const int16x8_t dc = vdupq_n_s16(a1);
|
const int16x8_t dc = vdupq_n_s16(a1);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (a1 >= 0) {
|
if (a1 >= 0) {
|
||||||
|
|||||||
@@ -394,7 +394,7 @@ static INLINE void vpx_highbd_idct32_32_neon(const tran_low_t *input,
|
|||||||
int32_t pass2[32 * 32];
|
int32_t pass2[32 * 32];
|
||||||
int32_t *out;
|
int32_t *out;
|
||||||
int32x4x2_t q[16];
|
int32x4x2_t q[16];
|
||||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
|
uint16_t *dst = CAST_TO_SHORTPTR(dest);
|
||||||
|
|
||||||
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
|
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
|
||||||
idct32_pass_loop++, input = pass1, out = pass2) {
|
idct32_pass_loop++, input = pass1, out = pass2) {
|
||||||
|
|||||||
@@ -729,7 +729,7 @@ static void vpx_highbd_idct32_16_neon(const int32_t *const input,
|
|||||||
void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
int i;
|
int i;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
int16_t temp[32 * 16];
|
int16_t temp[32 * 16];
|
||||||
|
|||||||
@@ -597,7 +597,7 @@ static void vpx_highbd_idct32_8_neon(const int32_t *input, uint16_t *output,
|
|||||||
void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
int i;
|
int i;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
int16_t temp[32 * 8];
|
int16_t temp[32 * 8];
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ void vpx_highbd_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
||||||
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
|
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6);
|
||||||
const int16x8_t dc = vdupq_n_s16(a1);
|
const int16x8_t dc = vdupq_n_s16(a1);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (a1 >= 0) {
|
if (a1 >= 0) {
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
||||||
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4);
|
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4);
|
||||||
const int16x8_t dc = vdupq_n_s16(a1);
|
const int16x8_t dc = vdupq_n_s16(a1);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
|
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
|
||||||
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
|
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
|
||||||
@@ -140,7 +140,7 @@ void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
int32x4_t c1 = vld1q_s32(input + 4);
|
int32x4_t c1 = vld1q_s32(input + 4);
|
||||||
int32x4_t c2 = vld1q_s32(input + 8);
|
int32x4_t c2 = vld1q_s32(input + 8);
|
||||||
int32x4_t c3 = vld1q_s32(input + 12);
|
int32x4_t c3 = vld1q_s32(input + 12);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
int16x8_t a0, a1;
|
int16x8_t a0, a1;
|
||||||
|
|
||||||
if (bd == 8) {
|
if (bd == 8) {
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ void vpx_highbd_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest8,
|
|||||||
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(out0 * cospi_16_64), bd);
|
||||||
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
|
const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
|
||||||
const int16x8_t dc = vdupq_n_s16(a1);
|
const int16x8_t dc = vdupq_n_s16(a1);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
if (a1 >= 0) {
|
if (a1 >= 0) {
|
||||||
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
|
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
|
||||||
@@ -294,7 +294,7 @@ static INLINE void highbd_add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2,
|
|||||||
|
|
||||||
void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
int32x4_t a0 = vld1q_s32(input);
|
int32x4_t a0 = vld1q_s32(input);
|
||||||
int32x4_t a1 = vld1q_s32(input + 8);
|
int32x4_t a1 = vld1q_s32(input + 8);
|
||||||
int32x4_t a2 = vld1q_s32(input + 16);
|
int32x4_t a2 = vld1q_s32(input + 16);
|
||||||
@@ -555,7 +555,7 @@ static INLINE void idct8x8_64_half1d_bd12(
|
|||||||
|
|
||||||
void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest8,
|
void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest8,
|
||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
int32x4_t a0 = vld1q_s32(input);
|
int32x4_t a0 = vld1q_s32(input);
|
||||||
int32x4_t a1 = vld1q_s32(input + 4);
|
int32x4_t a1 = vld1q_s32(input + 4);
|
||||||
int32x4_t a2 = vld1q_s32(input + 8);
|
int32x4_t a2 = vld1q_s32(input + 8);
|
||||||
|
|||||||
@@ -517,7 +517,7 @@ void vpx_idct32_32_neon(const tran_low_t *input, uint8_t *dest,
|
|||||||
const int16_t *input_pass2 = pass1; // input of pass2 is the result of pass1
|
const int16_t *input_pass2 = pass1; // input of pass2 is the result of pass1
|
||||||
int16_t *out;
|
int16_t *out;
|
||||||
int16x8_t q[16];
|
int16x8_t q[16];
|
||||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
|
uint16_t *dst = CAST_TO_SHORTPTR(dest);
|
||||||
|
|
||||||
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
|
for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2;
|
||||||
idct32_pass_loop++, out = pass2) {
|
idct32_pass_loop++, out = pass2) {
|
||||||
|
|||||||
@@ -1299,7 +1299,7 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_high_t a1, b1, c1, d1, e1;
|
tran_high_t a1, b1, c1, d1, e1;
|
||||||
const tran_low_t *ip = input;
|
const tran_low_t *ip = input;
|
||||||
tran_low_t *op = output;
|
tran_low_t *op = output;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
||||||
@@ -1355,7 +1355,7 @@ void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
|
|||||||
tran_low_t tmp[4];
|
tran_low_t tmp[4];
|
||||||
const tran_low_t *ip = in;
|
const tran_low_t *ip = in;
|
||||||
tran_low_t *op = tmp;
|
tran_low_t *op = tmp;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
(void)bd;
|
(void)bd;
|
||||||
|
|
||||||
a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
||||||
@@ -1458,7 +1458,7 @@ void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[4 * 4];
|
tran_low_t out[4 * 4];
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[4], temp_out[4];
|
tran_low_t temp_in[4], temp_out[4];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
@@ -1484,7 +1484,7 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_high_t a1;
|
tran_high_t a1;
|
||||||
tran_low_t out =
|
tran_low_t out =
|
||||||
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
||||||
a1 = ROUND_POWER_OF_TWO(out, 4);
|
a1 = ROUND_POWER_OF_TWO(out, 4);
|
||||||
@@ -1642,7 +1642,7 @@ void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[8 * 8];
|
tran_low_t out[8 * 8];
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[8], temp_out[8];
|
tran_low_t temp_in[8], temp_out[8];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// First transform rows
|
// First transform rows
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
@@ -1668,7 +1668,7 @@ void vpx_highbd_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[8 * 8] = { 0 };
|
tran_low_t out[8 * 8] = { 0 };
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[8], temp_out[8];
|
tran_low_t temp_in[8], temp_out[8];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// First transform rows
|
// First transform rows
|
||||||
// Only first 4 row has non-zero coefs
|
// Only first 4 row has non-zero coefs
|
||||||
@@ -1695,7 +1695,7 @@ void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_high_t a1;
|
tran_high_t a1;
|
||||||
tran_low_t out =
|
tran_low_t out =
|
||||||
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
||||||
a1 = ROUND_POWER_OF_TWO(out, 5);
|
a1 = ROUND_POWER_OF_TWO(out, 5);
|
||||||
@@ -2062,7 +2062,7 @@ void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[16 * 16];
|
tran_low_t out[16 * 16];
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[16], temp_out[16];
|
tran_low_t temp_in[16], temp_out[16];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// First transform rows
|
// First transform rows
|
||||||
for (i = 0; i < 16; ++i) {
|
for (i = 0; i < 16; ++i) {
|
||||||
@@ -2088,7 +2088,7 @@ void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[16 * 16] = { 0 };
|
tran_low_t out[16 * 16] = { 0 };
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[16], temp_out[16];
|
tran_low_t temp_in[16], temp_out[16];
|
||||||
uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *const dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// First transform rows. Since all non-zero dct coefficients are in
|
// First transform rows. Since all non-zero dct coefficients are in
|
||||||
// upper-left 8x8 area, we only need to calculate first 8 rows here.
|
// upper-left 8x8 area, we only need to calculate first 8 rows here.
|
||||||
@@ -2117,7 +2117,7 @@ void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[16 * 16] = { 0 };
|
tran_low_t out[16 * 16] = { 0 };
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[16], temp_out[16];
|
tran_low_t temp_in[16], temp_out[16];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// First transform rows. Since all non-zero dct coefficients are in
|
// First transform rows. Since all non-zero dct coefficients are in
|
||||||
// upper-left 4x4 area, we only need to calculate first 4 rows here.
|
// upper-left 4x4 area, we only need to calculate first 4 rows here.
|
||||||
@@ -2144,7 +2144,7 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_high_t a1;
|
tran_high_t a1;
|
||||||
tran_low_t out =
|
tran_low_t out =
|
||||||
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
||||||
a1 = ROUND_POWER_OF_TWO(out, 6);
|
a1 = ROUND_POWER_OF_TWO(out, 6);
|
||||||
@@ -2537,7 +2537,7 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[32 * 32];
|
tran_low_t out[32 * 32];
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[32], temp_out[32];
|
tran_low_t temp_in[32], temp_out[32];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
for (i = 0; i < 32; ++i) {
|
for (i = 0; i < 32; ++i) {
|
||||||
@@ -2575,7 +2575,7 @@ void vpx_highbd_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[32 * 32] = { 0 };
|
tran_low_t out[32 * 32] = { 0 };
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[32], temp_out[32];
|
tran_low_t temp_in[32], temp_out[32];
|
||||||
uint16_t *const dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *const dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
// Only upper-left 16x16 has non-zero coeff
|
// Only upper-left 16x16 has non-zero coeff
|
||||||
@@ -2604,7 +2604,7 @@ void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
tran_low_t out[32 * 32] = { 0 };
|
tran_low_t out[32 * 32] = { 0 };
|
||||||
tran_low_t *outptr = out;
|
tran_low_t *outptr = out;
|
||||||
tran_low_t temp_in[32], temp_out[32];
|
tran_low_t temp_in[32], temp_out[32];
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
|
|
||||||
// Rows
|
// Rows
|
||||||
// Only upper-left 8x8 has non-zero coeff
|
// Only upper-left 8x8 has non-zero coeff
|
||||||
@@ -2629,7 +2629,7 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
|||||||
int stride, int bd) {
|
int stride, int bd) {
|
||||||
int i, j;
|
int i, j;
|
||||||
int a1;
|
int a1;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
tran_low_t out =
|
tran_low_t out =
|
||||||
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
||||||
|
|
||||||
|
|||||||
@@ -3373,7 +3373,7 @@ void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
|
|||||||
__m128i sign_bits[2];
|
__m128i sign_bits[2];
|
||||||
__m128i temp_mm, min_input, max_input;
|
__m128i temp_mm, min_input, max_input;
|
||||||
int test;
|
int test;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
int optimised_cols = 0;
|
int optimised_cols = 0;
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i eight = _mm_set1_epi16(8);
|
const __m128i eight = _mm_set1_epi16(8);
|
||||||
@@ -3486,7 +3486,7 @@ void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
|
|||||||
int i, j, test;
|
int i, j, test;
|
||||||
__m128i inptr[8];
|
__m128i inptr[8];
|
||||||
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i sixteen = _mm_set1_epi16(16);
|
const __m128i sixteen = _mm_set1_epi16(16);
|
||||||
const __m128i max = _mm_set1_epi16(6201);
|
const __m128i max = _mm_set1_epi16(6201);
|
||||||
@@ -3586,7 +3586,7 @@ void vpx_highbd_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest8,
|
|||||||
int i, j, test;
|
int i, j, test;
|
||||||
__m128i inptr[8];
|
__m128i inptr[8];
|
||||||
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i sixteen = _mm_set1_epi16(16);
|
const __m128i sixteen = _mm_set1_epi16(16);
|
||||||
const __m128i max = _mm_set1_epi16(6201);
|
const __m128i max = _mm_set1_epi16(6201);
|
||||||
@@ -3689,7 +3689,7 @@ void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,
|
|||||||
int i, j, test;
|
int i, j, test;
|
||||||
__m128i inptr[32];
|
__m128i inptr[32];
|
||||||
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i rounding = _mm_set1_epi16(32);
|
const __m128i rounding = _mm_set1_epi16(32);
|
||||||
const __m128i max = _mm_set1_epi16(3155);
|
const __m128i max = _mm_set1_epi16(3155);
|
||||||
@@ -3802,7 +3802,7 @@ void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
|
|||||||
int i, j, test;
|
int i, j, test;
|
||||||
__m128i inptr[32];
|
__m128i inptr[32];
|
||||||
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
__m128i min_input, max_input, temp1, temp2, sign_bits;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
const __m128i zero = _mm_set1_epi16(0);
|
const __m128i zero = _mm_set1_epi16(0);
|
||||||
const __m128i rounding = _mm_set1_epi16(32);
|
const __m128i rounding = _mm_set1_epi16(32);
|
||||||
const __m128i max = _mm_set1_epi16(3155);
|
const __m128i max = _mm_set1_epi16(3155);
|
||||||
@@ -3920,7 +3920,7 @@ void vpx_highbd_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest8,
|
|||||||
const __m128i one = _mm_set1_epi16(1);
|
const __m128i one = _mm_set1_epi16(1);
|
||||||
const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
|
const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
|
||||||
int a, i, j;
|
int a, i, j;
|
||||||
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
uint16_t *dest = CAST_TO_SHORTPTR(dest8);
|
||||||
tran_low_t out;
|
tran_low_t out;
|
||||||
|
|
||||||
out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
out = HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
||||||
|
|||||||
Reference in New Issue
Block a user