From e9b54ab844fef7fce8d387d2e85349685a2c8674 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 30 Oct 2013 14:39:18 +0400
Subject: [PATCH] fixed cv::resize (area fast mode) - it writes outside of
 array boundary in SSE2 version

---
 modules/imgproc/src/imgwarp.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 7ce02c38e..f15e0a08a 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -1347,7 +1347,7 @@ public:
             }
         }
         else if (cn == 3)
-            for ( ; dx <= w - 6; dx += 6, S0 += 12, S1 += 12, D += 6)
+            for ( ; dx <= w - 11; dx += 6, S0 += 12, S1 += 12, D += 6)
             {
                 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
                 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
@@ -1372,6 +1372,9 @@ public:
         else
         {
             CV_Assert(cn == 4);
+            int v[] = { 0, 0, -1, -1 };
+            __m128i mask = _mm_loadu_si128((const __m128i*)v);
+
             for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
             {
                 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
@@ -1385,14 +1388,15 @@ public:
                 __m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 8));
                 __m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 8));
                 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
-                s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
-                _mm_storel_epi64((__m128i*)D, s0);
+                __m128i res0 = _mm_srli_epi16(s0, 2);
 
                 s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 8));
                 s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 8));
                 s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
-                s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero);
-                _mm_storel_epi64((__m128i*)(D+4), s0);
+                __m128i res1 = _mm_srli_epi16(s0, 2);
+                s0 = _mm_packus_epi16(_mm_or_si128(_mm_andnot_si128(mask, res0),
+                                                   _mm_and_si128(mask, _mm_slli_si128(res1, 8))), zero);
+                _mm_storel_epi64((__m128i*)(D), s0);
             }
         }
 
@@ -1445,7 +1449,7 @@ public:
             }
         }
         else if (cn == 3)
-            for ( ; dx <= w - 3; dx += 3, S0 += 6, S1 += 6, D += 3)
+            for ( ; dx <= w - 4; dx += 3, S0 += 6, S1 += 6, D += 3)
             {
                 __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
                 __m128i r1 = _mm_loadu_si128((const __m128i*)S1);