[UT] Add missing SSE2 4x4 IDCT test

IDCT input is defined in such a way that the intermediate values
cannot legally overflow an int16_t. The use of random values
as input causes such overflows. This results in implementation-
dependent output depending on which type is used to hold
intermediate results. Use a template for the test reference
implementation to test implementations with different
intermediate representation.
This commit is contained in:
Sindre Aamås 2016-01-18 20:25:46 +01:00
parent 8764231784
commit b9adbcf37c

View File

@ -179,6 +179,7 @@ TEST (DecodeMbAuxTest, WelsDequantIHadamard2x2Dc) {
EXPECT_TRUE (ok);
}
#define FDEC_STRIDE 32
template<typename clip_t>
void WelsIDctT4Anchor (uint8_t* p_dst, int16_t dct[16]) {
int16_t tmp[16];
int32_t iStridex2 = (FDEC_STRIDE << 1);
@ -193,13 +194,13 @@ void WelsIDctT4Anchor (uint8_t* p_dst, int16_t dct[16]) {
}
for (i = 0; i < 4; i++) {
uiDst = p_dst[i];
p_dst[i] = WelsClip1 (uiDst + ((tmp[i] + tmp[4 + i] + tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
p_dst[i] = WelsClip1 (uiDst + (clip_t (tmp[i] + tmp[4 + i] + tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
uiDst = p_dst[i + FDEC_STRIDE];
p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + ((tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32) >> 6));
p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + (clip_t (tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32) >> 6));
uiDst = p_dst[i + iStridex2];
p_dst[i + iStridex2] = WelsClip1 (uiDst + ((tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32) >> 6));
p_dst[i + iStridex2] = WelsClip1 (uiDst + (clip_t (tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32) >> 6));
uiDst = p_dst[i + iStridex3];
p_dst[i + iStridex3] = WelsClip1 (uiDst + ((tmp[i] - tmp[4 + i] + tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
p_dst[i + iStridex3] = WelsClip1 (uiDst + (clip_t (tmp[i] - tmp[4 + i] + tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
}
}
TEST (DecodeMbAuxTest, WelsIDctT4Rec_c) {
@ -214,7 +215,7 @@ TEST (DecodeMbAuxTest, WelsIDctT4Rec_c) {
iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
}
}
WelsIDctT4Anchor (iRefDst, iRefDct);
WelsIDctT4Anchor<int32_t> (iRefDst, iRefDct);
WelsIDctT4Rec_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
int ok = -1;
for (int i = 0; i < 4; i++) {
@ -257,13 +258,15 @@ TEST (DecodeMbAuxTest, WelsIDctT4Rec_mmx) {
}
}
#endif
template<typename clip_t>
void WelsIDctT8Anchor (uint8_t* p_dst, int16_t dct[4][16]) {
WelsIDctT4Anchor (&p_dst[0], dct[0]);
WelsIDctT4Anchor (&p_dst[4], dct[1]);
WelsIDctT4Anchor (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
WelsIDctT4Anchor (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
WelsIDctT4Anchor<clip_t> (&p_dst[0], dct[0]);
WelsIDctT4Anchor<clip_t> (&p_dst[4], dct[1]);
WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
}
TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
template<typename clip_t>
void TestIDctFourT4Rec (void (*func) (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct)) {
int16_t iRefDct[4][16];
uint8_t iRefDst[16 * FDEC_STRIDE];
ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 64, 16);
@ -277,8 +280,8 @@ TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
for (int j = 0; j < 8; j++)
iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
WelsIDctT8Anchor (iRefDst, iRefDct);
WelsIDctFourT4Rec_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
WelsIDctT8Anchor<clip_t> (iRefDst, iRefDct);
func (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
int ok = -1;
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
@ -290,6 +293,9 @@ TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
}
EXPECT_EQ (ok, -1);
}
TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
TestIDctFourT4Rec<int32_t> (WelsIDctFourT4Rec_c);
}
void WelsIDctRecI16x4DcAnchor (uint8_t* p_dst, int16_t dct[4]) {
for (int i = 0; i < 4; i++, p_dst += FDEC_STRIDE) {
p_dst[0] = WelsClip1 (p_dst[0] + ((dct[0] + 32) >> 6));
@ -345,6 +351,9 @@ TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_c) {
EXPECT_EQ (ok, -1);
}
#if defined(X86_ASM)
TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_sse2) {
TestIDctFourT4Rec<int16_t> (WelsIDctFourT4Rec_sse2);
}
TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_sse2) {
int32_t iCpuCores = 0;
uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);