From c1cb86af5f9cdea7afe2fff54d0e04b085d1e186 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Wed, 11 Apr 2018 15:17:14 +0200 Subject: [PATCH] fix 16b overflow in SSE2 the 'accum' variable can be larger than 15b for large rescale values. Assert triggered: src/dsp/rescaler_sse2.c:249: RescalerExportRowExpand_SSE2: Assertion `v >= 0 && v <= 255' failed. src/dsp/rescaler_sse2.c:350: RescalerExportRowShrink_SSE2: Assertion `v >= 0 && v <= 255' failed. -> fall back to C implementation in this case for now Change-Id: I7ea1cb72301cafc1459be403f6a6f4e3cbc89bb1 --- src/dsp/rescaler_sse2.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/dsp/rescaler_sse2.c b/src/dsp/rescaler_sse2.c index f93b204f..64c50dea 100644 --- a/src/dsp/rescaler_sse2.c +++ b/src/dsp/rescaler_sse2.c @@ -36,7 +36,7 @@ static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) { } // input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0 -static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) { +static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) { const __m128i zero = _mm_setzero_si128(); const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH *out = _mm_unpacklo_epi8(A, zero); @@ -50,13 +50,15 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, int accum = x_add; __m128i cur_pixels; + // SSE2 implementation only works with 16b signed arithmetic at max. + if (wrk->src_width < 8 || accum >= (1 << 15)) { + WebPRescalerImportRowExpand_C(wrk, src); + return; + } + assert(!WebPRescalerInputDone(wrk)); assert(wrk->x_expand); if (wrk->num_channels == 4) { - if (wrk->src_width < 2) { - WebPRescalerImportRowExpand_C(wrk, src); - return; - } LoadTwoPixels_SSE2(src, &cur_pixels); src += 4; while (1) { @@ -75,11 +77,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, } else { int left; const uint8_t* const src_limit = src + wrk->src_width - 8; - if (wrk->src_width < 8) { - WebPRescalerImportRowExpand_C(wrk, src); - return; - } - LoadHeightPixels_SSE2(src, &cur_pixels); + LoadEightPixels_SSE2(src, &cur_pixels); src += 7; left = 7; while (1) { @@ -94,7 +92,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, if (--left) { cur_pixels = _mm_srli_si128(cur_pixels, 2); } else if (src <= src_limit) { - LoadHeightPixels_SSE2(src, &cur_pixels); + LoadEightPixels_SSE2(src, &cur_pixels); src += 7; left = 7; } else { // tail