diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp
index 9daec2eaf..4c9980a4b 100644
--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@ -65,9 +65,9 @@ namespace cv
             clMemcpyDeviceToDevice
         };
         ///////////////////////////OpenCL call wrappers////////////////////////////
+        //void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
+        //                                  size_t widthInBytes, size_t height);
         void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
-                                          size_t widthInBytes, size_t height);
-        void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
                                             size_t widthInBytes, size_t height,
                                             DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
         void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp
index 1a96f785e..7c097a2c8 100644
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -423,12 +423,6 @@ namespace cv
         }
 
         void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
-                               size_t widthInBytes, size_t height)
-        {
-            openCLMallocPitchEx(clCxt, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
-        }
-
-        void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
                                  size_t widthInBytes, size_t height,
                                  DevMemRW rw_type, DevMemType mem_type, void* hptr)
         {
diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp
index 25252eaac..4f23789b3 100644
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@ -183,11 +183,21 @@ void cv::ocl::oclMat::upload(const Mat &m)
         int pitch = wholeSize.width * 3 * m.elemSize1();
         int tail_padding = m.elemSize1() * 3072;
         int err;
-        cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
-                                     (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
-        openCLVerifyCall(err);
+        cl_mem temp;
+        if(gDeviceMemType!=DEVICE_MEM_UHP && gDeviceMemType!=DEVICE_MEM_CHP){
+            temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
+                                  (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
+            openCLVerifyCall(err);
+            openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, 
+                           wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
+        }
+        else{
+            temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR,
+                                  (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, m.datastart, &err);
+            openCLVerifyCall(err);
+        }
 
-        openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
+        
         convert_C3C4(temp, *this);
         openCLSafeCall(clReleaseMemObject(temp));
     }
@@ -915,7 +925,7 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
         size_t esz = elemSize();
 
         void *dev_ptr;
-        openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols),
+        openCLMallocPitch(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols),
                             rows, rw_type, mem_type, hptr);
 
         if (esz * cols == step)