vpx_fdct16x16_1_c/msa: fix accumulator overflow
tran_low_t is only signed 16-bits in non-high-bitdepth mode Change-Id: Ie02b5caf2658e8e71f995c17dd5ce666a4d64918
This commit is contained in:
parent
3735def667
commit
eb64ea3e89
@ -792,6 +792,67 @@ TEST_P(InvTrans16x16DCT, CompareReference) {
|
||||
CompareInvReference(ref_txfm_, thresh_);
|
||||
}
|
||||
|
||||
class PartialTrans16x16Test
|
||||
: public ::testing::TestWithParam<
|
||||
std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > {
|
||||
public:
|
||||
virtual ~PartialTrans16x16Test() {}
|
||||
virtual void SetUp() {
|
||||
fwd_txfm_ = GET_PARAM(0);
|
||||
bit_depth_ = GET_PARAM(1);
|
||||
}
|
||||
|
||||
virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
||||
|
||||
protected:
|
||||
vpx_bit_depth_t bit_depth_;
|
||||
FdctFunc fwd_txfm_;
|
||||
};
|
||||
|
||||
TEST_P(PartialTrans16x16Test, Extremes) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const int16_t maxval =
|
||||
static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
|
||||
#else
|
||||
const int16_t maxval = 255;
|
||||
#endif
|
||||
const int minval = -maxval;
|
||||
DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
|
||||
|
||||
for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
|
||||
output[0] = 0;
|
||||
ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
|
||||
EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
|
||||
|
||||
for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
|
||||
output[0] = 0;
|
||||
ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
|
||||
EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
|
||||
}
|
||||
|
||||
TEST_P(PartialTrans16x16Test, Random) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const int16_t maxval =
|
||||
static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
|
||||
#else
|
||||
const int16_t maxval = 255;
|
||||
#endif
|
||||
DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
|
||||
DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
|
||||
int sum = 0;
|
||||
for (int i = 0; i < kNumCoeffs; ++i) {
|
||||
const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
|
||||
input[i] = val;
|
||||
sum += val;
|
||||
}
|
||||
output[0] = 0;
|
||||
ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
|
||||
EXPECT_EQ(sum >> 1, output[0]);
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
@ -824,6 +885,11 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, PartialTrans16x16Test,
|
||||
::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8),
|
||||
make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10),
|
||||
make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12)));
|
||||
#else
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16HT,
|
||||
@ -832,6 +898,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
|
||||
::testing::Values(make_tuple(&vpx_fdct16x16_1_c,
|
||||
VPX_BITS_8)));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
@ -859,6 +928,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
|
||||
VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
|
||||
::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
|
||||
VPX_BITS_8)));
|
||||
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
@ -896,6 +968,9 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
|
||||
make_tuple(&idct16x16_12,
|
||||
&idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
|
||||
::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
|
||||
VPX_BITS_8)));
|
||||
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
@ -912,5 +987,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
|
||||
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
|
||||
VPX_BITS_8)));
|
||||
INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
|
||||
::testing::Values(make_tuple(&vpx_fdct16x16_1_msa,
|
||||
VPX_BITS_8)));
|
||||
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
} // namespace
|
||||
|
@ -365,12 +365,12 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||
|
||||
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||
int r, c;
|
||||
tran_low_t sum = 0;
|
||||
int sum = 0;
|
||||
for (r = 0; r < 16; ++r)
|
||||
for (c = 0; c < 16; ++c)
|
||||
sum += input[r * stride + c];
|
||||
|
||||
output[0] = sum >> 1;
|
||||
output[0] = (tran_low_t)(sum >> 1);
|
||||
}
|
||||
|
||||
static INLINE tran_high_t dct_32_round(tran_high_t input) {
|
||||
|
@ -237,11 +237,9 @@ void vpx_fdct16x16_msa(const int16_t *input, int16_t *output,
|
||||
}
|
||||
|
||||
void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
|
||||
out[1] = 0;
|
||||
|
||||
out[0] = LD_HADD(input, stride);
|
||||
out[0] += LD_HADD(input + 8, stride);
|
||||
out[0] += LD_HADD(input + 16 * 8, stride);
|
||||
out[0] += LD_HADD(input + 16 * 8 + 8, stride);
|
||||
out[0] >>= 1;
|
||||
int sum = LD_HADD(input, stride);
|
||||
sum += LD_HADD(input + 8, stride);
|
||||
sum += LD_HADD(input + 16 * 8, stride);
|
||||
sum += LD_HADD(input + 16 * 8 + 8, stride);
|
||||
out[0] = (int16_t)(sum >> 1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user