Merge branch '2.4'

2013-02-22 17:33:30 +04:00
parent 0ccdc5b4af e6dd4e840d
commit 0b8a6da817
47 changed files with 669 additions and 376 deletions
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -102,7 +102,7 @@ namespace cv

        //this function enable ocl module to use customized cl_context and cl_command_queue
        //getDevice also need to be called before this function
-        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); 
+        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);

        //////////////////////////////// Error handling ////////////////////////
        CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
@@ -125,6 +125,24 @@ namespace cv
            Impl *impl;
        };

+        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+                                                        const char **source, string kernelName,
+                                                        size_t globalThreads[3], size_t localThreads[3],
+                                                        std::vector< std::pair<size_t, const void *> > &args,
+                                                        int channels, int depth, const char *build_options,
+                                                        bool finish = true, bool measureKernelTime = false,
+                                                        bool cleanUp = true);
+
+        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
+                                                        const char **fileName, const int numFiles, string kernelName,
+                                                        size_t globalThreads[3], size_t localThreads[3],
+                                                        std::vector< std::pair<size_t, const void *> > &args,
+                                                        int channels, int depth, const char *build_options,
+                                                        bool finish = true, bool measureKernelTime = false,
+                                                        bool cleanUp = true);
+
        class CV_EXPORTS oclMatExpr;
        //////////////////////////////// oclMat ////////////////////////////////
        class CV_EXPORTS oclMat
@@ -469,21 +487,22 @@ namespace cv
        CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());

        //! Logical operators
-        CV_EXPORTS oclMatExpr operator ~ (const oclMat &src);
-        CV_EXPORTS oclMatExpr operator | (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator & (const oclMat &src1, const oclMat &src2);
-        CV_EXPORTS oclMatExpr operator ^ (const oclMat &src1, const oclMat &src2);
+        CV_EXPORTS oclMat operator ~ (const oclMat &);
+        CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &);
+        CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &);
+        CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &);
+

        //! Mathematics operators
        CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2);
        CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2);
        CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
        CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
- 
+
        //! computes convolution of two images
        //! support only CV_32FC1 type
        CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
- 
+
        CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);

        //////////////////////////////// Filter Engine ////////////////////////////////
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@@ -109,5 +109,5 @@ TEST_P(Gemm, Performance)
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
-                            testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
 #endif
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -2125,22 +2125,22 @@ void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, c
        bitwise_scalar( src1, src2, dst, mask, kernelName, &arithm_bitwise_xor_scalar);
 }

-oclMatExpr cv::ocl::operator ~ (const oclMat &src)
+oclMat cv::ocl::operator ~ (const oclMat &src)
 {
    return oclMatExpr(src, oclMat(), MAT_NOT);
 }

-oclMatExpr cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator | (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_OR);
 }

-oclMatExpr cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator & (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_AND);
 }

-oclMatExpr cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
+oclMat cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2)
 {
    return oclMatExpr(src1, src2, MAT_XOR);
 }
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -12,6 +12,7 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
@@ -23,6 +24,7 @@
 //    Zhang Ying, zhangying913@gmail.com
 //    Xu Pang, pangxu010@163.com
 //    Wu Zailong, bullet@yeah.net
+//    Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -1524,7 +1526,7 @@ namespace cv
            mat_dst.create(mat_src.rows, mat_src.cols, CV_8UC1);

            oclMat mat_hist(1, 256, CV_32SC1);
-            //mat_hist.setTo(0);
+
            calcHist(mat_src, mat_hist);

            Context *clCxt = mat_src.clCxt;
@@ -1533,10 +1535,10 @@ namespace cv
            size_t globalThreads[3] = { 256, 1, 1};
            oclMat lut(1, 256, CV_8UC1);
            vector<pair<size_t , const void *> > args;
-            float scale = 255.f / (mat_src.rows * mat_src.cols);
+            int total = mat_src.rows * mat_src.cols;
            args.push_back( make_pair( sizeof(cl_mem), (void *)&lut.data));
            args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_hist.data));
-            args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
+            args.push_back( make_pair( sizeof(int), (void *)&total));
            openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1);
            LUT(mat_src, lut, mat_dst);
        }
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -47,6 +47,7 @@

 #include "precomp.hpp"
 #include <iomanip>
+#include <fstream>
 #include "binarycaching.hpp"

 using namespace cv;
@@ -730,7 +731,138 @@ namespace cv
 #endif
        }

-        cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+       double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName,
+                                 size_t globalThreads[3], size_t localThreads[3],
+                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+                                 bool finish, bool measureKernelTime, bool cleanUp)
+
+        {
+            //construct kernel name
+            //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
+            //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+            stringstream idxStr;
+            if(channels != -1)
+                idxStr << "_C" << channels;
+            if(depth != -1)
+                idxStr << "_D" << depth;
+            kernelName += idxStr.str();
+
+            cl_kernel kernel;
+            kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
+
+            double kernelTime = 0.0;
+
+            if( globalThreads != NULL)
+            {
+                if ( localThreads != NULL)
+                {
+                    globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
+                    globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
+                    globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+
+                    //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
+                    cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
+                }
+                for(size_t i = 0; i < args.size(); i ++)
+                    openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
+
+                if(measureKernelTime == false)
+                {
+                    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+                                    localThreads, 0, NULL, NULL));
+                }
+                else
+                {
+                    cl_event event = NULL;
+                    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+                                    localThreads, 0, NULL, &event));
+
+                    cl_ulong end_time, queue_time;
+
+                    openCLSafeCall(clWaitForEvents(1, &event));
+
+                    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
+                                    sizeof(cl_ulong), &end_time, 0));
+
+                    openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
+                                    sizeof(cl_ulong), &queue_time, 0));
+
+                    kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
+
+                    clReleaseEvent(event);
+                }
+            }
+
+            if(finish)
+            {
+                clFinish(clCxt->impl->clCmdQueue);
+            }
+
+            if(cleanUp)
+            {
+                openCLSafeCall(clReleaseKernel(kernel));
+            }
+
+            return kernelTime;
+        }
+
+        // Converts the contents of a file into a string
+        static int convertToString(const char *filename, std::string& s)
+        {
+            size_t size;
+            char*  str;
+
+            std::fstream f(filename, (std::fstream::in | std::fstream::binary));
+            if(f.is_open())
+            {
+                size_t fileSize;
+                f.seekg(0, std::fstream::end);
+                size = fileSize = (size_t)f.tellg();
+                f.seekg(0, std::fstream::beg);
+
+                str = new char[size+1];
+                if(!str)
+                {
+                    f.close();
+                    return -1;
+                }
+
+                f.read(str, fileSize);
+                f.close();
+                str[size] = '\0';
+
+                s = str;
+                delete[] str;
+                return 0;
+            }
+            printf("Error: Failed to open file %s\n", filename);
+            return -1;
+        }
+
+        double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName,
+                                 size_t globalThreads[3], size_t localThreads[3],
+                                 vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
+                                 bool finish, bool measureKernelTime, bool cleanUp)
+
+        {
+            std::vector<std::string> fsource;
+            for (int i = 0 ; i < numFiles ; i++)
+            {
+                std::string str;
+                if (convertToString(fileName[i], str) >= 0)
+                    fsource.push_back(str);
+            }
+            const char **source = new const char *[numFiles];
+            for (int i = 0 ; i < numFiles ; i++)
+                source[i] = fsource[i].c_str();
+            double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads,
+                                 args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
+            fsource.clear();
+            delete []source;
+            return kernelTime;
+        }
+
+       cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
                             const size_t size)
        {
            int status;
--- a/modules/ocl/src/kernels/imgproc_histogram.cl
+++ b/modules/ocl/src/kernels/imgproc_histogram.cl
@@ -3,12 +3,14 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Niko Li, newlife20080214@gmail.com
 //    Jia Haipeng, jiahaipeng95@gmail.com
 //    Xu Pang, pangxu010@163.com
+//    Wenju He, wenju@multicorewareinc.com
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@@ -189,24 +191,27 @@ __kernel __attribute__((reqd_work_group_size(256,1,1)))void merge_hist(__global
 __kernel __attribute__((reqd_work_group_size(256,1,1)))void calLUT(
                            __global uchar * dst,
                            __constant int * hist,
-                            float scale)
+                            int total)
 {
    int lid = get_local_id(0);
-    __local int sumhist[HISTOGRAM256_BIN_COUNT];
-    //__local uchar lut[HISTOGRAM256_BIN_COUNT+1];
+    __local int sumhist[HISTOGRAM256_BIN_COUNT+1];

    sumhist[lid]=hist[lid];
    barrier(CLK_LOCAL_MEM_FENCE);
    if(lid==0)
    {
        int sum = 0;
-        for(int i=0;i<HISTOGRAM256_BIN_COUNT;i++)
+        int i = 0;
+        while (!sumhist[i]) ++i;
+        sumhist[HISTOGRAM256_BIN_COUNT] = sumhist[i];
+        for(sumhist[i++] = 0; i<HISTOGRAM256_BIN_COUNT; i++)
        {
            sum+=sumhist[i];
            sumhist[i]=sum;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
+    float scale = 255.f/(total - sumhist[HISTOGRAM256_BIN_COUNT]);
    dst[lid]= lid == 0 ? 0 : convert_uchar_sat(convert_float(sumhist[lid])*scale);
 }
 /*
--- a/modules/ocl/src/kernels/meanShift.cl
+++ b/modules/ocl/src/kernels/meanShift.cl
@@ -12,11 +12,13 @@
 //
 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Shengen Yan,yanshengen@gmail.com
 //    Xu Pang, pangxu010@163.com
+//    Wenju He, wenju@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -43,12 +45,6 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-typedef double F;
-#else
-typedef float F;
-#endif

 short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
               __global uchar4* in, int in_step, int dst_off, int src_off,
@@ -184,12 +180,11 @@ short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
        if( count == 0 )
            break;

-        F  icount = 1.0/count;
-        int x1 = convert_int_rtz(sx*icount);
-        int y1 = convert_int_rtz(sy*icount);
-        s.x = convert_int_rtz(s.x*icount);
-        s.y = convert_int_rtz(s.y*icount);
-        s.z = convert_int_rtz(s.z*icount);
+        int x1 = sx/count;
+        int y1 = sy/count;
+        s.x = s.x/count;
+        s.y = s.y/count;
+        s.z = s.z/count;

        int4 tmp = s - convert_int4(c);
        int norm2 = tmp.x * tmp.x + tmp.y *  tmp.y +
--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@ -46,6 +46,9 @@
 #include "mcwutil.hpp"

 #if defined (HAVE_OPENCL)
+#ifndef CL_VERSION_1_2
+#define CL_VERSION_1_2 0
+#endif

 using namespace std;

@@ -123,6 +126,101 @@ namespace cv
            openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
                                  build_options, finish_mode);
        }
+
+       cl_mem bindTexture(const oclMat &mat)
+        {
+            cl_mem texture;
+            cl_image_format format;
+            int err;
+            int depth    = mat.depth();
+            int channels = mat.channels();
+
+            switch(depth)
+            {
+            case CV_8U:
+                format.image_channel_data_type = CL_UNSIGNED_INT8;
+                break;
+            case CV_32S:
+                format.image_channel_data_type = CL_UNSIGNED_INT32;
+                break;
+            case CV_32F:
+                format.image_channel_data_type = CL_FLOAT;
+                break;
+            default:
+                throw std::exception();
+                break;
+            }
+            switch(channels)
+            {
+            case 1:
+                format.image_channel_order     = CL_R;
+                break;
+            case 3:
+                format.image_channel_order     = CL_RGB;
+                break;
+            case 4:
+                format.image_channel_order     = CL_RGBA;
+                break;
+            default:
+                throw std::exception();
+                break;
+            }
+#if CL_VERSION_1_2
+            cl_image_desc desc;
+            desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
+            desc.image_width      = mat.cols;
+            desc.image_height     = mat.rows;
+            desc.image_depth      = 0;
+            desc.image_array_size = 1;
+            desc.image_row_pitch  = 0;
+            desc.image_slice_pitch = 0;
+            desc.buffer           = NULL;
+            desc.num_mip_levels   = 0;
+            desc.num_samples      = 0;
+            texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+#else
+            texture = clCreateImage2D(
+                mat.clCxt->impl->clContext,
+                CL_MEM_READ_WRITE,
+                &format,
+                mat.cols,
+                mat.rows,
+                0,
+                NULL,
+                &err);
+#endif
+            size_t origin[] = { 0, 0, 0 };
+            size_t region[] = { mat.cols, mat.rows, 1 };
+
+            cl_mem devData;
+            if (mat.cols * mat.elemSize() != mat.step)
+            {
+                devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
+                    * mat.elemSize(), NULL, NULL);
+                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
+                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
+                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
+            }
+            else
+            {
+                devData = (cl_mem)mat.data;
+            }
+
+            clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
+            if ((mat.cols * mat.elemSize() != mat.step))
+            {
+                clFinish(mat.clCxt->impl->clCmdQueue);
+                clReleaseMemObject(devData);
+            }
+
+            openCLSafeCall(err);
+            return texture;
+        }
+
+        void releaseTexture(cl_mem& texture)
+        {
+            openCLFree(texture);
+        }
    }//namespace ocl

 }//namespace cv
--- a/modules/ocl/src/mcwutil.hpp
+++ b/modules/ocl/src/mcwutil.hpp
@@ -67,6 +67,12 @@ namespace cv
        void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
                                  size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
                                  int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+        // bind oclMat to OpenCL image textures
+        // note:
+        //   1. there is no memory management. User need to explicitly release the resource
+        //   2. for faster clamping, there is no buffer padding for the constructed texture
+        cl_mem bindTexture(const oclMat &mat);
+        void releaseTexture(cl_mem& texture);
    }//namespace ocl

 }//namespace cv
--- a/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@@ -10,10 +10,15 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
+// @Authors
+//		Dachuan Zhao, dachuan@multicorewareinc.com
+//		Yao Wang, yao@multicorewareinc.com
+//      Nathan, liujun@multicorewareinc.com
+//
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
@@ -22,13 +27,13 @@
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
+//     and/or other oclMaterials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
+// any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
@@ -40,6 +45,7 @@
 //
 //M*/

+
 #include "precomp.hpp"
 #include "mcwutil.hpp"
 using namespace std;
@@ -568,197 +574,16 @@ static void pyrDown_cus(const oclMat &src, oclMat &dst)
    pyrdown_run_cus(src, dst);
 }

-
-//struct MultiplyScalar
-//{
-//    MultiplyScalar(double val_, double scale_) : val(val_), scale(scale_) {}
-//    double operator ()(double a) const
-//    {
-//        return (scale * a * val);
-//    }
-//    const double val;
-//    const double scale;
-//};
-//
-//void callF(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
-//{
-//	Mat srcTemp;
-//	Mat dstTemp;
-//	src.download(srcTemp);
-//	dst.download(dstTemp);
-//
-//	int i;
-//	int j;
-//	int k;
-//	for(i = 0; i < srcTemp.rows; i++)
-//	{
-//		for(j = 0; j < srcTemp.cols; j++)
-//		{
-//			for(k = 0; k < srcTemp.channels(); k++)
-//			{
-//				((float*)dstTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k] = (float)op(((float*)srcTemp.data)[srcTemp.channels() * (i * srcTemp.rows + j) + k]);
-//			}
-//		}
-//	}
-//
-//	dst = dstTemp;
-//}
-//
-//static inline bool isAligned(const unsigned char* ptr, size_t size)
-//{
-//    return reinterpret_cast<size_t>(ptr) % size == 0;
-//}
-//
-//static inline bool isAligned(size_t step, size_t size)
-//{
-//    return step % size == 0;
-//}
-//
-//void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
-//{
-//    if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
-//        !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
-//    {
-//        callF(src, dst, op, mask);
-//        return;
-//    }
-//
-//	Mat srcTemp;
-//	Mat dstTemp;
-//	src.download(srcTemp);
-//	dst.download(dstTemp);
-//
-//	int x_shifted;
-//
-//	int i;
-//	int j;
-//	for(i = 0; i < srcTemp.rows; i++)
-//	{
-//		const double* srcRow = (const double*)srcTemp.data + i * srcTemp.rows;
-//        double* dstRow = (double*)dstTemp.data + i * dstTemp.rows;;
-//
-//		for(j = 0; j < srcTemp.cols; j++)
-//		{
-//			x_shifted = j * 4;
-//
-//			if(x_shifted + 4 - 1 < srcTemp.cols)
-//			{
-//				dstRow[x_shifted    ] = op(srcRow[x_shifted    ]);
-//				dstRow[x_shifted + 1] = op(srcRow[x_shifted + 1]);
-//				dstRow[x_shifted + 2] = op(srcRow[x_shifted + 2]);
-//				dstRow[x_shifted + 3] = op(srcRow[x_shifted + 3]);
-//			}
-//			else
-//			{
-//				for (int real_x = x_shifted; real_x < srcTemp.cols; ++real_x)
-//				{
-//					((float*)dstTemp.data)[i * srcTemp.rows + real_x] = op(((float*)srcTemp.data)[i * srcTemp.rows + real_x]);
-//				}
-//			}
-//		}
-//	}
-//}
-//
-//void multiply(const oclMat& src1, double val, oclMat& dst, double scale = 1.0f);
-//void multiply(const oclMat& src1, double val, oclMat& dst, double scale)
-//{
-//    MultiplyScalar op(val, scale);
-//	//if(src1.channels() == 1 && dst.channels() == 1)
-//	//{
-//	//    callT(src1, dst, op, 0);
-//	//}
-//	//else
-//	//{
-//	    callF(src1, dst, op, 0);
-//	//}
-//}
-
-static cl_mem bindTexture(const oclMat &mat, int depth, int channels)
-{
-    cl_mem texture;
-    cl_image_format format;
-    int err;
-    if(depth == 0)
-    {
-        format.image_channel_data_type = CL_UNSIGNED_INT8;
-    }
-    else if(depth == 5)
-    {
-        format.image_channel_data_type = CL_FLOAT;
-    }
-    if(channels == 1)
-    {
-        format.image_channel_order     = CL_R;
-    }
-    else if(channels == 3)
-    {
-        format.image_channel_order     = CL_RGB;
-    }
-    else if(channels == 4)
-    {
-        format.image_channel_order     = CL_RGBA;
-    }
-#ifdef CL_VERSION_1_2
-    cl_image_desc desc;
-    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
-    desc.image_width      = mat.step / mat.elemSize();
-    desc.image_height     = mat.rows;
-    desc.image_depth      = 0;
-    desc.image_array_size = 1;
-    desc.image_row_pitch  = 0;
-    desc.image_slice_pitch = 0;
-    desc.buffer           = NULL;
-    desc.num_mip_levels   = 0;
-    desc.num_samples      = 0;
-    texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
-#else
-    texture = clCreateImage2D(
-                  mat.clCxt->impl->clContext,
-                  CL_MEM_READ_WRITE,
-                  &format,
-                  mat.step / mat.elemSize(),
-                  mat.rows,
-                  0,
-                  NULL,
-                  &err);
-#endif
-    size_t origin[] = { 0, 0, 0 };
-    size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
-    clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
-    openCLSafeCall(err);
-
-    return texture;
-}
-
-static void releaseTexture(cl_mem texture)
-{
-    openCLFree(texture);
-}
-
 static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
 {
    Context  *clCxt = I.clCxt;
-    char platform[256] = {0};
-    cl_platform_id pid;
-    clGetDeviceInfo(clCxt->impl->devices, CL_DEVICE_PLATFORM, sizeof(pid), &pid, NULL);
-    clGetPlatformInfo(pid, CL_PLATFORM_NAME, 256, platform, NULL);
-    std::string namestr = platform;
-    bool isImageSupported = true;
-    if(namestr.find("NVIDIA")!=string::npos || namestr.find("Intel")!=string::npos)
-        isImageSupported = false;
-
    int elemCntPerRow = I.step / I.elemSize();
-
    string kernelName = "lkSparse";
-
-
-    size_t localThreads[3]  = { 8, isImageSupported?8:32, 1 };
-    size_t globalThreads[3] = { 8 * ptcount, isImageSupported?8:32, 1};
-
+    size_t localThreads[3]  = { 8, 8, 1 };
+    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
    int cn = I.oclchannels();
-
    char calcErr;
    if (level == 0)
    {
@@ -770,22 +595,11 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    }

    vector<pair<size_t , const void *> > args;
-    cl_mem ITex;
-    cl_mem JTex;
-    if (isImageSupported)
-    {
-        ITex = bindTexture(I, I.depth(), cn);
-        JTex = bindTexture(J, J.depth(), cn);
-    }
-    else
-    {
-        ITex = (cl_mem)I.data;
-        JTex = (cl_mem)J.data;
-    }
+    cl_mem ITex = bindTexture(I);
+    cl_mem JTex = bindTexture(J);

    args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
-    //cl_mem clmD = clCreateBuffer(clCxt, CL_MEM_READ_WRITE, ptcount * sizeof(float), NULL, NULL);
    args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
@@ -795,10 +609,6 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
-    if (!isImageSupported)
-    {
-        args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
-    }
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
@@ -806,18 +616,20 @@ static void lkSparse_run(oclMat &I, oclMat &J,
    args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
-    //args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));

-    if (isImageSupported)
+    try
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
-
+    }
+    catch(Exception&)
+    {
+        printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        releaseTexture(ITex);
        releaseTexture(JTex);
-    }
-    else
-    {
-        //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
+        ITex = (cl_mem)I.data;
+        JTex = (cl_mem)J.data;
+        localThreads[1] = globalThreads[1] = 32;
+        args.insert( args.begin()+11, make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
 }
@@ -927,8 +739,6 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
    size_t localThreads[3]  = { 16, 16, 1 };
    size_t globalThreads[3] = { I.cols, I.rows, 1};

-    int cn = I.oclchannels();
-
    bool calcErr;
    if (err)
    {
@@ -944,8 +754,8 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,

    if (isImageSupported)
    {
-        ITex = bindTexture(I, I.depth(), cn);
-        JTex = bindTexture(J, J.depth(), cn);
+        ITex = bindTexture(I);
+        JTex = bindTexture(J);
    }
    else
    {
--- a/modules/ocl/test/test_gemm.cpp
+++ b/modules/ocl/test/test_gemm.cpp
@@ -81,5 +81,5 @@ TEST_P(Gemm, Accuracy)
 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
-                            testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
 #endif
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -183,12 +183,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
        if( count == 0 )
            break;

-        double icount = 1.0 / count;
-        int x1 = cvFloor(sx * icount);
-        int y1 = cvFloor(sy * icount);
-        s0 = cvFloor(s0 * icount);
-        s1 = cvFloor(s1 * icount);
-        s2 = cvFloor(s2 * icount);
+        int x1 = sx / count;
+        int y1 = sy / count;
+        s0 = s0 / count;
+        s1 = s1 / count;
+        s2 = s2 / count;

        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
@@ -1370,9 +1369,7 @@ TEST_P(meanShiftFiltering, Mat)
        gdst.download(cpu_gdst);

        char sss[1024];
-        char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
        sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
-        strcat(sss, warning);
        EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);

    }
@@ -1398,9 +1395,7 @@ TEST_P(meanShiftProc, Mat)
        gdstCoor.download(cpu_gdstCoor);

        char sss[1024];
-        char warning[300] = "Warning: If the selected device doesn't support double, a deviation will exist.\nIf the accuracy is acceptable, please ignore it.\n";
        sprintf(sss, "roicols=%d,roirows=%d,srcx=%d,srcy=%d,dstx=%d,dsty=%d\n", roicols, roirows, srcx, srcy, dstx, dsty);
-        strcat(sss, warning);
        EXPECT_MAT_NEAR(dst, cpu_gdst, 0.0, sss);
        EXPECT_MAT_NEAR(dstCoor, cpu_gdstCoor, 0.0, sss);
    }