fix 16b overflow in SSE2
the 'accum' variable can be larger than 15b for large rescale values. Assert triggered: src/dsp/rescaler_sse2.c:249: RescalerExportRowExpand_SSE2: Assertion `v >= 0 && v <= 255' failed. src/dsp/rescaler_sse2.c:350: RescalerExportRowShrink_SSE2: Assertion `v >= 0 && v <= 255' failed. -> fall back to C implementation in this case for now Change-Id: I7ea1cb72301cafc1459be403f6a6f4e3cbc89bb1
This commit is contained in:
parent
e577feb7c2
commit
c1cb86af5f
@ -36,7 +36,7 @@ static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
|
// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
|
||||||
static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
|
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
|
||||||
*out = _mm_unpacklo_epi8(A, zero);
|
*out = _mm_unpacklo_epi8(A, zero);
|
||||||
@ -50,13 +50,15 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
|||||||
int accum = x_add;
|
int accum = x_add;
|
||||||
__m128i cur_pixels;
|
__m128i cur_pixels;
|
||||||
|
|
||||||
|
// SSE2 implementation only works with 16b signed arithmetic at max.
|
||||||
|
if (wrk->src_width < 8 || accum >= (1 << 15)) {
|
||||||
|
WebPRescalerImportRowExpand_C(wrk, src);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
assert(!WebPRescalerInputDone(wrk));
|
assert(!WebPRescalerInputDone(wrk));
|
||||||
assert(wrk->x_expand);
|
assert(wrk->x_expand);
|
||||||
if (wrk->num_channels == 4) {
|
if (wrk->num_channels == 4) {
|
||||||
if (wrk->src_width < 2) {
|
|
||||||
WebPRescalerImportRowExpand_C(wrk, src);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LoadTwoPixels_SSE2(src, &cur_pixels);
|
LoadTwoPixels_SSE2(src, &cur_pixels);
|
||||||
src += 4;
|
src += 4;
|
||||||
while (1) {
|
while (1) {
|
||||||
@ -75,11 +77,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
|||||||
} else {
|
} else {
|
||||||
int left;
|
int left;
|
||||||
const uint8_t* const src_limit = src + wrk->src_width - 8;
|
const uint8_t* const src_limit = src + wrk->src_width - 8;
|
||||||
if (wrk->src_width < 8) {
|
LoadEightPixels_SSE2(src, &cur_pixels);
|
||||||
WebPRescalerImportRowExpand_C(wrk, src);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LoadHeightPixels_SSE2(src, &cur_pixels);
|
|
||||||
src += 7;
|
src += 7;
|
||||||
left = 7;
|
left = 7;
|
||||||
while (1) {
|
while (1) {
|
||||||
@ -94,7 +92,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
|||||||
if (--left) {
|
if (--left) {
|
||||||
cur_pixels = _mm_srli_si128(cur_pixels, 2);
|
cur_pixels = _mm_srli_si128(cur_pixels, 2);
|
||||||
} else if (src <= src_limit) {
|
} else if (src <= src_limit) {
|
||||||
LoadHeightPixels_SSE2(src, &cur_pixels);
|
LoadEightPixels_SSE2(src, &cur_pixels);
|
||||||
src += 7;
|
src += 7;
|
||||||
left = 7;
|
left = 7;
|
||||||
} else { // tail
|
} else { // tail
|
||||||
|
Loading…
Reference in New Issue
Block a user