diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 193cb43a6..9232f49bc 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -198,10 +198,8 @@ namespace cv
             if (map1.empty())
                 map1.swap(map2);
 
-            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
-                      /*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
-            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
-                                                                    (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
+            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
+            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ) ||
                       (map1.type() == CV_32FC2 && !map2.data) ||
                       (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
             CV_Assert(!map2.data || map2.size() == map1.size());
@@ -231,8 +229,8 @@ namespace cv
                 CV_Error(CV_StsBadArg, "Unsupported map types");
 
             int ocn = dst.oclchannels();
-            size_t localThreads[3] = { 16, 16, 1};
-            size_t globalThreads[3] = { dst.cols, dst.rows, 1};
+            size_t localThreads[3] = { 256, 1, 1 };
+            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
 
             Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
             std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
index b623091ed..340e741cc 100644
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -243,6 +243,60 @@ __kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * ds
 
 #elif INTER_LINEAR
 
+__kernel void remap_16SC2_16UC1(__global T const * restrict src, __global T * dst,
+        __global short2 * restrict map1, __global ushort * restrict map2,
+        int src_offset, int dst_offset, int map1_offset, int map2_offset,
+        int src_step, int dst_step, int map1_step, int map2_step,
+        int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < dst_cols && y < dst_rows)
+    {
+        int dstIdx = mad24(y, dst_step, x + dst_offset);
+        int map1Idx = mad24(y, map1_step, x + map1_offset);
+        int map2Idx = mad24(y, map2_step, x + map2_offset);
+
+        int2 map_dataA = convert_int2(map1[map1Idx]);
+        int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
+        int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
+        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
+
+        ushort map2Value = (ushort)(map2[map2Idx] & (INTER_TAB_SIZE2 - 1));
+        WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE);
+
+        WT scalar = convertToWT(nVal);
+        WT a = scalar, b = scalar, c = scalar, d = scalar;
+
+        if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
+            a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataA, a);
+
+        if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
+            b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataB, b);
+
+        if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
+            c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataC, c);
+
+        if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
+            d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataD, d);
+
+        WT dst_data = a * (1 - u.x) * (1 - u.y) +
+                      b * (u.x)     * (1 - u.y) +
+                      c * (1 - u.x) * (u.y) +
+                      d * (u.x)     * (u.y);
+        dst[dstIdx] = convertToT(dst_data);
+    }
+}
+
 __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         __global float * map1, __global float * map2,
         int src_offset, int dst_offset, int map1_offset, int map2_offset,
@@ -263,7 +317,7 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         int2 map_dataA = convert_int2_sat_rtn(map_data);
         int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
         int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
-        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
+        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
 
         float2 _u = map_data - convert_float2(map_dataA);
         WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
@@ -290,10 +344,10 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         else
             EXTRAPOLATE(map_dataD, d);
 
-        WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
-                      b * (WT)(u.x)     * (WT)(1 - u.y) +
-                      c * (WT)(1 - u.x) * (WT)(u.y) +
-                      d * (WT)(u.x)     * (WT)(u.y);
+        WT dst_data = a * (1 - u.x) * (1 - u.y) +
+                      b * (u.x)     * (1 - u.y) +
+                      c * (1 - u.x) * (u.y) +
+                      d * (u.x)     * (u.y);
         dst[dstIdx] = convertToT(dst_data);
     }
 }
@@ -343,10 +397,10 @@ __kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
         else
             EXTRAPOLATE(map_dataD, d);
 
-        WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
-                      b * (WT)(u.x)     * (WT)(1 - u.y) +
-                      c * (WT)(1 - u.x) * (WT)(u.y) +
-                      d * (WT)(u.x)     * (WT)(u.y);
+        WT dst_data = a * (1 - u.x) * (1 - u.y) +
+                      b * (u.x)     * (1 - u.y) +
+                      c * (1 - u.x) * (u.y) +
+                      d * (u.x)     * (u.y);
         dst[dstIdx] = convertToT(dst_data);
     }
 }
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index 42415d099..adb3f20cd 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -313,7 +313,12 @@ PARAM_TEST_CASE(Remap, MatDepth, Channels, pair<MatType, MatType>, Border, bool)
 
         Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
         if (map2Type != noType)
-            randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, -mapMaxValue, mapMaxValue);
+        {
+            int mapMinValue = -mapMaxValue;
+            if (map2Type == CV_16UC1 || map2Type == CV_16SC1)
+                mapMinValue = 0, mapMaxValue = INTER_TAB_SIZE2;
+            randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, mapMinValue, mapMaxValue);
+        }
 
         generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder);
         generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder);
@@ -452,6 +457,7 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
                             Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
                             Values(1, 2, 3, 4),
                             Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
+                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
                                    pair<MatType, MatType>((MatType)CV_32FC2, noType)),
                             Values((Border)BORDER_CONSTANT,
                                    (Border)BORDER_REPLICATE,