Fix potential overflow issue in hadamard_16x16()

This commit fixes a potential integer overflow issue in function
hadamard_16x16. It adds corresponding dynamic range comment.

Change-Id: Iec22f3be345fb920ec79178e016378e2f65b20be
This commit is contained in:
Jingning Han 2015-06-12 10:53:43 -07:00
parent 4f52d49f1e
commit 176c291d9c
2 changed files with 15 additions and 12 deletions

View File

@ -88,26 +88,28 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
int16_t *coeff) { int16_t *coeff) {
int idx; int idx;
for (idx = 0; idx < 4; ++idx) { for (idx = 0; idx < 4; ++idx) {
// src_diff: 9 bit, dynamic range [-255, 255]
int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8; + (idx & 0x01) * 8;
vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
} }
// coeff: 15 bit, dynamic range [-16320, 16320]
for (idx = 0; idx < 64; ++idx) { for (idx = 0; idx < 64; ++idx) {
int16_t a0 = coeff[0]; int16_t a0 = coeff[0];
int16_t a1 = coeff[64]; int16_t a1 = coeff[64];
int16_t a2 = coeff[128]; int16_t a2 = coeff[128];
int16_t a3 = coeff[192]; int16_t a3 = coeff[192];
int16_t b0 = a0 + a1; int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
int16_t b1 = a0 - a1; int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
int16_t b2 = a2 + a3; int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
int16_t b3 = a2 - a3; int16_t b3 = (a2 - a3) >> 1;
coeff[0] = (b0 + b2) >> 1; coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
coeff[64] = (b1 + b3) >> 1; coeff[64] = b1 + b3;
coeff[128] = (b0 - b2) >> 1; coeff[128] = b0 - b2;
coeff[192] = (b1 - b3) >> 1; coeff[192] = b1 - b3;
++coeff; ++coeff;
} }

View File

@ -264,17 +264,18 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
__m128i b2 = _mm_add_epi16(coeff2, coeff3); __m128i b2 = _mm_add_epi16(coeff2, coeff3);
__m128i b3 = _mm_sub_epi16(coeff2, coeff3); __m128i b3 = _mm_sub_epi16(coeff2, coeff3);
b0 = _mm_srai_epi16(b0, 1);
b1 = _mm_srai_epi16(b1, 1);
b2 = _mm_srai_epi16(b2, 1);
b3 = _mm_srai_epi16(b3, 1);
coeff0 = _mm_add_epi16(b0, b2); coeff0 = _mm_add_epi16(b0, b2);
coeff1 = _mm_add_epi16(b1, b3); coeff1 = _mm_add_epi16(b1, b3);
coeff0 = _mm_srai_epi16(coeff0, 1);
coeff1 = _mm_srai_epi16(coeff1, 1);
_mm_store_si128((__m128i *)coeff, coeff0); _mm_store_si128((__m128i *)coeff, coeff0);
_mm_store_si128((__m128i *)(coeff + 64), coeff1); _mm_store_si128((__m128i *)(coeff + 64), coeff1);
coeff2 = _mm_sub_epi16(b0, b2); coeff2 = _mm_sub_epi16(b0, b2);
coeff3 = _mm_sub_epi16(b1, b3); coeff3 = _mm_sub_epi16(b1, b3);
coeff2 = _mm_srai_epi16(coeff2, 1);
coeff3 = _mm_srai_epi16(coeff3, 1);
_mm_store_si128((__m128i *)(coeff + 128), coeff2); _mm_store_si128((__m128i *)(coeff + 128), coeff2);
_mm_store_si128((__m128i *)(coeff + 192), coeff3); _mm_store_si128((__m128i *)(coeff + 192), coeff3);