From 4248f8221146a929b1ac98f24d58c699e8dafa03 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 18:40:53 +0400
Subject: [PATCH 1/3] added ROI support to ocl::buildWarp*Maps functions

---
 modules/ocl/src/build_warps.cpp       | 144 ++++++++++++++-----------
 modules/ocl/src/opencl/build_warps.cl | 149 +++++++++++---------------
 2 files changed, 141 insertions(+), 152 deletions(-)

diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp
index dc9ab66db..40c082b55 100644
--- a/modules/ocl/src/build_warps.cpp
+++ b/modules/ocl/src/build_warps.cpp
@@ -53,7 +53,7 @@ using namespace cv::ocl;
 // buildWarpPlaneMaps
 
 void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
-                                 float scale, oclMat &map_x, oclMat &map_y)
+                                 float scale, oclMat &xmap, oclMat &ymap)
 {
     CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
     CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
@@ -68,37 +68,40 @@ void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K,
 
     oclMat KRT_oclMat(KRT_mat);
     // transfer K_Rinv and T into a single cl_mem
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
+    xmap.create(dst_roi.size(), CV_32F);
+    ymap.create(dst_roi.size(), CV_32F);
 
     int tl_u = dst_roi.tl().x;
     int tl_v = dst_roi.tl().y;
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpPlaneMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
     args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
 
-    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpCylyndricalMaps
 
 void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
-                                       oclMat &map_x, oclMat &map_y)
+                                       oclMat &xmap, oclMat &ymap)
 {
     CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
     CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
@@ -108,36 +111,40 @@ void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Ma
 
     oclMat KR_oclMat(K_Rinv.reshape(1, 1));
 
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
+    xmap.create(dst_roi.size(), CV_32F);
+    ymap.create(dst_roi.size(), CV_32F);
 
     int tl_u = dst_roi.tl().x;
     int tl_v = dst_roi.tl().y;
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpCylindricalMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
     args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
 
-    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpSphericalMaps
+
 void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
-                                     oclMat &map_x, oclMat &map_y)
+                                     oclMat &xmap, oclMat &ymap)
 {
     CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
     CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
@@ -147,37 +154,41 @@ void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat
 
     oclMat KR_oclMat(K_Rinv.reshape(1, 1));
     // transfer K_Rinv, R_Kinv into a single cl_mem
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
+    xmap.create(dst_roi.size(), CV_32F);
+    ymap.create(dst_roi.size(), CV_32F);
 
     int tl_u = dst_roi.tl().x;
     int tl_v = dst_roi.tl().y;
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpSphericalMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
     args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
 
-    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1);
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpAffineMaps
 
 void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
 {
-
     CV_Assert(M.rows == 2 && M.cols == 3);
+    CV_Assert(dsize.area());
 
     xmap.create(dsize, CV_32FC1);
     ymap.create(dsize, CV_32FC1);
@@ -194,29 +205,34 @@ void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat
         iM.convertTo(coeffsMat, coeffsMat.type());
     }
 
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
+
     oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpAffineMaps";
     vector< pair<size_t, const void *> > args;
-
     args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
 
-    size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1);
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpPerspectiveMaps
+
 void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
 {
-
     CV_Assert(M.rows == 3 && M.cols == 3);
+    CV_Assert(dsize.area() > 0);
 
     xmap.create(dsize, CV_32FC1);
     ymap.create(dsize, CV_32FC1);
@@ -235,19 +251,21 @@ void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, o
 
     oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpPerspectiveMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
+    vector< pair<size_t, const void *> > args;
     args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
 
-    size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1);
 }
diff --git a/modules/ocl/src/opencl/build_warps.cl b/modules/ocl/src/opencl/build_warps.cl
index 4402e8c38..bd5e002b5 100644
--- a/modules/ocl/src/opencl/build_warps.cl
+++ b/modules/ocl/src/opencl/build_warps.cl
@@ -43,31 +43,25 @@
 //
 //M*/
 
-__kernel
-    void buildWarpPlaneMaps
-    (
-    __global float * map_x,
-    __global float * map_y,
-    __constant float * KRT,
-    int tl_u,
-    int tl_v,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y,
-    float scale
-    )
+__kernel void buildWarpPlaneMaps(__global float * xmap, __global float * ymap,
+                                 __constant float * KRT,
+                                 int tl_u, int tl_v,
+                                 int cols, int rows,
+                                 int xmap_step, int ymap_step,
+                                 int xmap_offset, int ymap_offset,
+                                 float scale)
 {
     int du = get_global_id(0);
     int dv = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     __constant float * ck_rinv = KRT;
     __constant float * ct      = KRT + 9;
 
     if (du < cols && dv < rows)
     {
+        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
+        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
+
         float u = tl_u + du;
         float v = tl_v + dv;
         float x, y;
@@ -83,33 +77,27 @@ __kernel
         x /= z;
         y /= z;
 
-        map_x[dv * step_x + du] = x;
-        map_y[dv * step_y + du] = y;
+        xmap[xmap_index] = x;
+        ymap[ymap_index] = y;
     }
 }
 
-__kernel
-    void buildWarpCylindricalMaps
-    (
-    __global float * map_x,
-    __global float * map_y,
-    __constant float * ck_rinv,
-    int tl_u,
-    int tl_v,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y,
-    float scale
-    )
+__kernel void buildWarpCylindricalMaps(__global float * xmap, __global float * ymap,
+                                       __constant float * ck_rinv,
+                                       int tl_u, int tl_v,
+                                       int cols, int rows,
+                                       int xmap_step, int ymap_step,
+                                       int xmap_offset, int ymap_offset,
+                                       float scale)
 {
     int du = get_global_id(0);
     int dv = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (du < cols && dv < rows)
     {
+        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
+        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
+
         float u = tl_u + du;
         float v = tl_v + dv;
         float x, y;
@@ -127,33 +115,27 @@ __kernel
         if (z > 0) { x /= z; y /= z; }
         else x = y = -1;
 
-        map_x[dv * step_x + du] = x;
-        map_y[dv * step_y + du] = y;
+        xmap[xmap_index] = x;
+        ymap[ymap_index] = y;
     }
 }
 
-__kernel
-    void buildWarpSphericalMaps
-    (
-    __global float * map_x,
-    __global float * map_y,
-    __constant float * ck_rinv,
-    int tl_u,
-    int tl_v,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y,
-    float scale
-    )
+__kernel void buildWarpSphericalMaps(__global float * xmap, __global float * ymap,
+                                     __constant float * ck_rinv,
+                                     int tl_u, int tl_v,
+                                     int cols, int rows,
+                                     int xmap_step, int ymap_step,
+                                     int xmap_offset, int ymap_offset,
+                                     float scale)
 {
     int du = get_global_id(0);
     int dv = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (du < cols && dv < rows)
     {
+        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
+        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
+
         float u = tl_u + du;
         float v = tl_v + dv;
         float x, y;
@@ -174,63 +156,52 @@ __kernel
         if (z > 0) { x /= z; y /= z; }
         else x = y = -1;
 
-        map_x[dv * step_x + du] = x;
-        map_y[dv * step_y + du] = y;
+        xmap[xmap_index] = x;
+        ymap[ymap_index] = y;
     }
 }
 
-__kernel
-    void buildWarpAffineMaps
-    (
-    __global float * xmap,
-    __global float * ymap,
-    __constant float * c_warpMat,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y
-    )
+__kernel void buildWarpAffineMaps(__global float * xmap, __global float * ymap,
+                                  __constant float * c_warpMat,
+                                  int cols, int rows,
+                                  int xmap_step, int ymap_step,
+                                  int xmap_offset, int ymap_offset)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (x < cols && y < rows)
     {
-        const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
-        const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
+        int xmap_index = mad24(y, xmap_step, x + xmap_offset);
+        int ymap_index = mad24(y, ymap_step, x + ymap_offset);
 
-        map_x[y * step_x + x] = xcoo;
-        map_y[y * step_y + x] = ycoo;
+        float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
+        float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
+
+        xmap[xmap_index] = xcoo;
+        ymap[ymap_index] = ycoo;
     }
 }
 
-__kernel
-    void buildWarpPerspectiveMaps
-    (
-    __global float * xmap,
-    __global float * ymap,
-    __constant float * c_warpMat,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y
-    )
+__kernel void buildWarpPerspectiveMaps(__global float * xmap, __global float * ymap,
+                                       __constant float * c_warpMat,
+                                       int cols, int rows,
+                                       int xmap_step, int ymap_step,
+                                       int xmap_offset, int ymap_offset)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (x < cols && y < rows)
     {
-        const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
+        int xmap_index = mad24(y, xmap_step, x + xmap_offset);
+        int ymap_index = mad24(y, ymap_step, x + ymap_offset);
 
-        const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
-        const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
+        float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
+        float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
+        float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
 
-        map_x[y * step_x + x] = xcoo;
-        map_y[y * step_y + x] = ycoo;
+        xmap[xmap_index] = xcoo;
+        ymap[ymap_index] = ycoo;
     }
 }

From 8b57893e406c5df6dbbb29a2c805005b47db9428 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 18:42:13 +0400
Subject: [PATCH 2/3] added an accuracy test for ocl::buildWarpPerspectiveMaps

---
 modules/ocl/test/test_warp.cpp | 110 +++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index b9231d116..3da73dc23 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -156,6 +156,114 @@ OCL_TEST_P(WarpPerspective, Mat)
     }
 }
 
+// buildWarpPerspectiveMaps
+
+PARAM_TEST_CASE(BuildWarpPerspectiveMaps, bool, bool)
+{
+    bool useRoi, mapInverse;
+    Size dsize;
+
+    Mat xmap_whole, ymap_whole, xmap_roi, ymap_roi;
+    ocl::oclMat gxmap_whole, gymap_whole, gxmap_roi, gymap_roi;
+
+    void SetUp()
+    {
+        mapInverse = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
+    }
+
+    void random_roi()
+    {
+        dsize = randomSize(1, MAX_VALUE);
+
+        Border xmapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+        randomSubMat(xmap_whole, xmap_roi, dsize, xmapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
+
+        Border ymapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+        randomSubMat(ymap_whole, ymap_roi, dsize, ymapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
+
+        generateOclMat(gxmap_whole, gxmap_roi, xmap_whole, dsize, xmapBorder);
+        generateOclMat(gymap_whole, gymap_roi, ymap_whole, dsize, ymapBorder);
+    }
+
+    void Near(double threshold = 0.0)
+    {
+        Mat whole, roi;
+        gxmap_whole.download(whole);
+        gxmap_roi.download(roi);
+
+        EXPECT_MAT_NEAR(xmap_whole, whole, threshold);
+        EXPECT_MAT_NEAR(xmap_roi, roi, threshold);
+    }
+
+    void Near1(double threshold = 0.0)
+    {
+        Mat whole, roi;
+        gymap_whole.download(whole);
+        gymap_roi.download(roi);
+
+        EXPECT_MAT_NEAR(ymap_whole, whole, threshold);
+        EXPECT_MAT_NEAR(ymap_roi, roi, threshold);
+    }
+};
+
+static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
+{
+    CV_Assert(M.rows == 3 && M.cols == 3);
+    CV_Assert(dsize.area() > 0);
+
+    xmap.create(dsize, CV_32FC1);
+    ymap.create(dsize, CV_32FC1);
+
+    float coeffs[3 * 3];
+    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
+
+    if (inverse)
+        M.convertTo(coeffsMat, coeffsMat.type());
+    else
+    {
+        cv::Mat iM;
+        invert(M, iM);
+        iM.convertTo(coeffsMat, coeffsMat.type());
+    }
+
+    for (int y = 0; y < dsize.height; ++y)
+    {
+        float * const xmap_ptr = xmap.ptr<float>(y);
+        float * const ymap_ptr = ymap.ptr<float>(y);
+
+        for (int x = 0; x < dsize.width; ++x)
+        {
+            float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
+            xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
+            ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
+        }
+    }
+}
+
+OCL_TEST_P(BuildWarpPerspectiveMaps, Mat)
+{
+    for (int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+
+        float cols = static_cast<float>(MAX_VALUE), rows = static_cast<float>(MAX_VALUE);
+        float cols2 = cols / 2.0f, rows2 = rows / 2.0f;
+        Point2f sp[] = { Point2f(0.0f, 0.0f), Point2f(cols, 0.0f), Point2f(0.0f, rows), Point2f(cols, rows) };
+        Point2f dp[] = { Point2f(rng.uniform(0.0f, cols2), rng.uniform(0.0f, rows2)),
+            Point2f(rng.uniform(cols2, cols), rng.uniform(0.0f, rows2)),
+            Point2f(rng.uniform(0.0f, cols2), rng.uniform(rows2, rows)),
+            Point2f(rng.uniform(cols2, cols), rng.uniform(rows2, rows)) };
+        Mat M = getPerspectiveTransform(sp, dp);
+
+        buildWarpPerspectiveMaps(M, mapInverse, dsize, xmap_roi, ymap_roi);
+        ocl::buildWarpPerspectiveMaps(M, mapInverse, dsize, gxmap_roi, gymap_roi);
+
+        Near(1e-6);
+        Near1(1e-6);
+    }
+}
+
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // remap
 
@@ -338,6 +446,8 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
                             Bool(),
                             Bool()));
 
+INSTANTIATE_TEST_CASE_P(ImgprocWarp, BuildWarpPerspectiveMaps, Combine(Bool(), Bool()));
+
 INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
                             Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
                             Values(1, 2, 3, 4),

From 3727168b50cc1693366f9cd517f70f5b46dc593e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 18:43:15 +0400
Subject: [PATCH 3/3] added a performance test for
 ocl::buildWarpPerspectiveMaps; moved warps to a separate file

---
 modules/ocl/perf/perf_imgproc.cpp | 194 ------------------
 modules/ocl/perf/perf_imgwarp.cpp | 320 ++++++++++++++++++++++++++++++
 modules/ocl/test/test_warp.cpp    |   4 +-
 3 files changed, 322 insertions(+), 196 deletions(-)
 create mode 100644 modules/ocl/perf/perf_imgwarp.cpp

diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp
index c57950ff1..b43458c6a 100644
--- a/modules/ocl/perf/perf_imgproc.cpp
+++ b/modules/ocl/perf/perf_imgproc.cpp
@@ -231,139 +231,6 @@ PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
         OCL_PERF_ELSE
 }
 
-///////////// WarpAffine ////////////////////////
-
-typedef Size_MatType WarpAffineFixture;
-
-PERF_TEST_P(WarpAffineFixture, WarpAffine,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    static const double coeffs[2][3] =
-    {
-        { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
-        { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
-    };
-    Mat M(2, 3, CV_64F, (void *)coeffs);
-    const int interpolation = INTER_NEAREST;
-
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// WarpPerspective ////////////////////////
-
-typedef Size_MatType WarpPerspectiveFixture;
-
-PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    static const double coeffs[3][3] =
-    {
-        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
-        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
-        {0.0, 0.0, 1.0}
-    };
-    Mat M(3, 3, CV_64F, (void *)coeffs);
-    const int interpolation = INTER_LINEAR;
-
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst)
-            .time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// resize ////////////////////////
-
-CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
-
-typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
-typedef TestBaseWithParam<resizeParams> resizeFixture;
-
-PERF_TEST_P(resizeFixture, resize,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               resizeInterType::all(),
-                               ::testing::Values(0.5, 2.0)))
-{
-    const resizeParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), interType = get<2>(params);
-    double scale = get<3>(params);
-
-    Mat src(srcSize, type), dst;
-    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-    if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
-        declare.time(11);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
 ///////////// threshold////////////////////////
 
 CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
@@ -727,67 +594,6 @@ PERF_TEST_P(meanShiftProcFixture, meanShiftProc,
         OCL_PERF_ELSE
 }
 
-///////////// remap////////////////////////
-
-CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
-
-typedef tuple<Size, MatType, RemapInterType> remapParams;
-typedef TestBaseWithParam<remapParams> remapFixture;
-
-PERF_TEST_P(remapFixture, remap,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               RemapInterType::all()))
-{
-    const remapParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), interpolation = get<2>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
-        declare.time(9);
-
-    Mat xmap, ymap;
-    xmap.create(srcSize, CV_32FC1);
-    ymap.create(srcSize, CV_32FC1);
-
-    for (int i = 0; i < srcSize.height; ++i)
-    {
-        float * const xmap_row = xmap.ptr<float>(i);
-        float * const ymap_row = ymap.ptr<float>(i);
-
-        for (int j = 0; j < srcSize.width; ++j)
-        {
-            xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
-            ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
-        }
-    }
-
-    const int borderMode = BORDER_CONSTANT;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-        ocl::oclMat oclXMap(xmap), oclYMap(ymap);
-
-        OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
 ///////////// CLAHE ////////////////////////
 
 typedef TestBaseWithParam<Size> CLAHEFixture;
diff --git a/modules/ocl/perf/perf_imgwarp.cpp b/modules/ocl/perf/perf_imgwarp.cpp
new file mode 100644
index 000000000..0aff45e9a
--- /dev/null
+++ b/modules/ocl/perf/perf_imgwarp.cpp
@@ -0,0 +1,320 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
+
+///////////// WarpAffine ////////////////////////
+
+typedef Size_MatType WarpAffineFixture;
+
+PERF_TEST_P(WarpAffineFixture, WarpAffine,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
+{
+    static const double coeffs[2][3] =
+    {
+        { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
+        { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
+    };
+    Mat M(2, 3, CV_64F, (void *)coeffs);
+    const int interpolation = INTER_NEAREST;
+
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// WarpPerspective ////////////////////////
+
+typedef Size_MatType WarpPerspectiveFixture;
+
+PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
+{
+    static const double coeffs[3][3] =
+    {
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
+        {0.0, 0.0, 1.0}
+    };
+    Mat M(3, 3, CV_64F, (void *)coeffs);
+    const int interpolation = INTER_LINEAR;
+
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst)
+            .time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// resize ////////////////////////
+
+CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
+
+typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
+typedef TestBaseWithParam<resizeParams> resizeFixture;
+
+PERF_TEST_P(resizeFixture, resize,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
+                               resizeInterType::all(),
+                               ::testing::Values(0.5, 2.0)))
+{
+    const resizeParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), interType = get<2>(params);
+    double scale = get<3>(params);
+
+    Mat src(srcSize, type), dst;
+    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+    if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
+        declare.time(11);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// remap////////////////////////
+
+CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
+
+typedef tuple<Size, MatType, RemapInterType> remapParams;
+typedef TestBaseWithParam<remapParams> remapFixture;
+
+PERF_TEST_P(remapFixture, remap,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
+                               RemapInterType::all()))
+{
+    const remapParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), interpolation = get<2>(params);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
+        declare.time(9);
+
+    Mat xmap, ymap;
+    xmap.create(srcSize, CV_32FC1);
+    ymap.create(srcSize, CV_32FC1);
+
+    for (int i = 0; i < srcSize.height; ++i)
+    {
+        float * const xmap_row = xmap.ptr<float>(i);
+        float * const ymap_row = ymap.ptr<float>(i);
+
+        for (int j = 0; j < srcSize.width; ++j)
+        {
+            xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
+            ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
+        }
+    }
+
+    const int borderMode = BORDER_CONSTANT;
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+        ocl::oclMat oclXMap(xmap), oclYMap(ymap);
+
+        OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+
+///////////// buildWarpPerspectiveMaps ////////////////////////
+
+static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
+{
+    CV_Assert(M.rows == 3 && M.cols == 3);
+    CV_Assert(dsize.area() > 0);
+
+    xmap.create(dsize, CV_32FC1);
+    ymap.create(dsize, CV_32FC1);
+
+    float coeffs[3 * 3];
+    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
+
+    if (inverse)
+        M.convertTo(coeffsMat, coeffsMat.type());
+    else
+    {
+        cv::Mat iM;
+        invert(M, iM);
+        iM.convertTo(coeffsMat, coeffsMat.type());
+    }
+
+    for (int y = 0; y < dsize.height; ++y)
+    {
+        float * const xmap_ptr = xmap.ptr<float>(y);
+        float * const ymap_ptr = ymap.ptr<float>(y);
+
+        for (int x = 0; x < dsize.width; ++x)
+        {
+            float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
+            xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
+            ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
+        }
+    }
+}
+
+typedef TestBaseWithParam<Size> buildWarpPerspectiveMapsFixture;
+
+PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
+{
+    static const double coeffs[3][3] =
+    {
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
+        {0.0, 0.0, 1.0}
+    };
+    Mat M(3, 3, CV_64F, (void *)coeffs);
+    const Size dsize = GetParam();
+
+    Mat xmap(dsize, CV_32FC1), ymap(dsize, CV_32FC1);
+    declare.in(M).out(xmap, ymap);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclXMap(dsize, CV_32FC1), oclYMap(dsize, CV_32FC1);
+
+        OCL_TEST_CYCLE() cv::ocl::buildWarpPerspectiveMaps(M, true, dsize, oclXMap, oclYMap);
+
+        oclXMap.download(xmap);
+        oclYMap.download(ymap);
+
+        SANITY_CHECK(xmap);
+        SANITY_CHECK(ymap);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() buildWarpPerspectiveMaps(M, true, dsize, xmap, ymap);
+
+        SANITY_CHECK(xmap);
+        SANITY_CHECK(ymap);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index 3da73dc23..42415d099 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -259,8 +259,8 @@ OCL_TEST_P(BuildWarpPerspectiveMaps, Mat)
         buildWarpPerspectiveMaps(M, mapInverse, dsize, xmap_roi, ymap_roi);
         ocl::buildWarpPerspectiveMaps(M, mapInverse, dsize, gxmap_roi, gymap_roi);
 
-        Near(1e-6);
-        Near1(1e-6);
+        Near(5e-3);
+        Near1(5e-3);
     }
 }