use host data when DEVICE_MEM_UHP is set (the risk of vary align size is owned by users)

This commit is contained in:
yao 2013-04-13 14:58:49 +08:00
parent 3b364330ad
commit 40d0e0eda0
4 changed files with 31 additions and 47 deletions

View File

@ -263,8 +263,10 @@ namespace cv
void create(Size size, int type); void create(Size size, int type);
//! allocates new oclMatrix with specified device memory type. //! allocates new oclMatrix with specified device memory type.
void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type); void createEx(int rows, int cols, int type,
void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type); DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
void createEx(Size size, int type, DevMemRW rw_type,
DevMemType mem_type, void* hptr = 0);
//! decreases reference counter; //! decreases reference counter;
// deallocate the data when reference counter reaches 0. // deallocate the data when reference counter reaches 0.

View File

@ -68,7 +68,8 @@ namespace cv
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height); size_t widthInBytes, size_t height);
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); size_t widthInBytes, size_t height,
DevMemRW rw_type, DevMemType mem_type, void* hptr = 0);
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
const void *src, size_t spitch, const void *src, size_t spitch,
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);

View File

@ -163,7 +163,7 @@ namespace cv
{ {
releaseResources(); releaseResources();
delete this; delete this;
} }
} }
Impl* copy() Impl* copy()
@ -260,9 +260,8 @@ namespace cv
int setDevMemType(DevMemRW rw_type, DevMemType mem_type) int setDevMemType(DevMemRW rw_type, DevMemType mem_type)
{ {
if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) || if( (mem_type == DEVICE_MEM_PM &&
mem_type == DEVICE_MEM_UHP || Context::getContext()->impl->unified_memory == 0) )
mem_type == DEVICE_MEM_CHP )
return -1; return -1;
gDeviceMemRW = rw_type; gDeviceMemRW = rw_type;
gDeviceMemType = mem_type; gDeviceMemType = mem_type;
@ -432,11 +431,17 @@ namespace cv
} }
void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) size_t widthInBytes, size_t height,
DevMemRW rw_type, DevMemType mem_type, void* hptr)
{ {
cl_int status; cl_int status;
*dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], if(hptr && (mem_type==DEVICE_MEM_UHP || mem_type==DEVICE_MEM_CHP))
widthInBytes * height, 0, &status); *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext,
gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
widthInBytes * height, hptr, &status);
else
*dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
widthInBytes * height, 0, &status);
openCLVerifyCall(status); openCLVerifyCall(status);
*pitch = widthInBytes; *pitch = widthInBytes;
} }

View File

@ -177,15 +177,9 @@ void cv::ocl::oclMat::upload(const Mat &m)
Size wholeSize; Size wholeSize;
Point ofs; Point ofs;
m.locateROI(wholeSize, ofs); m.locateROI(wholeSize, ofs);
// int type = m.type();
// if(m.oclchannels() == 3)
//{
// type = CV_MAKETYPE(m.depth(), 4);
//}
create(wholeSize, m.type());
if(m.channels() == 3) if(m.channels() == 3)
{ {
create(wholeSize, m.type());
int pitch = wholeSize.width * 3 * m.elemSize1(); int pitch = wholeSize.width * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072; int tail_padding = m.elemSize1() * 3072;
int err; int err;
@ -195,35 +189,20 @@ void cv::ocl::oclMat::upload(const Mat &m)
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
convert_C3C4(temp, *this); convert_C3C4(temp, *this);
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
// 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
//for(int i=0;i<wholeSize.height;i++)
//{
// int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
// for(int j=0;j<wholeSize.width;j++)
// {
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
// }
//}
//delete []cputemp;
//delete []cpudata;
openCLSafeCall(clReleaseMemObject(temp)); openCLSafeCall(clReleaseMemObject(temp));
} }
else else
{ {
openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice); // try to use host ptr
createEx(wholeSize, m.type(), gDeviceMemRW, gDeviceMemType, m.datastart);
if(gDeviceMemType!=DEVICE_MEM_UHP && gDeviceMemType!=DEVICE_MEM_CHP)
openCLMemcpy2D(clCxt, data, step, m.datastart, m.step,
wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
} }
rows = m.rows; rows = m.rows;
cols = m.cols; cols = m.cols;
offset = ofs.y * step + ofs.x * elemSize(); offset = ofs.y * step + ofs.x * elemSize();
//download_channels = m.channels();
} }
void cv::ocl::oclMat::download(cv::Mat &m) const void cv::ocl::oclMat::download(cv::Mat &m) const
@ -908,9 +887,10 @@ oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
} }
void cv::ocl::oclMat::createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type) void cv::ocl::oclMat::createEx(Size size, int type,
DevMemRW rw_type, DevMemType mem_type, void* hptr)
{ {
createEx(size.height, size.width, type, rw_type, mem_type); createEx(size.height, size.width, type, rw_type, mem_type, hptr);
} }
void cv::ocl::oclMat::create(int _rows, int _cols, int _type) void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
@ -918,16 +898,12 @@ void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType); createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
} }
void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type, DevMemType mem_type) void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
DevMemRW rw_type, DevMemType mem_type, void* hptr)
{ {
clCxt = Context::getContext(); clCxt = Context::getContext();
/* core logic */ /* core logic */
_type &= Mat::TYPE_MASK; _type &= Mat::TYPE_MASK;
//download_channels = CV_MAT_CN(_type);
//if(download_channels==3)
//{
// _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4);
//}
if( rows == _rows && cols == _cols && type() == _type && data ) if( rows == _rows && cols == _cols && type() == _type && data )
return; return;
if( data ) if( data )
@ -943,8 +919,8 @@ void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type
size_t esz = elemSize(); size_t esz = elemSize();
void *dev_ptr; void *dev_ptr;
openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type); openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols),
//openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows); rows, rw_type, mem_type, hptr);
if (esz * cols == step) if (esz * cols == step)
flags |= Mat::CONTINUOUS_FLAG; flags |= Mat::CONTINUOUS_FLAG;