restore cudaSafeCall

This commit is contained in:
Vladislav Vinogradov
2013-04-08 12:37:36 +04:00
parent 2bad639aee
commit b50090f850
73 changed files with 807 additions and 805 deletions

View File

@@ -64,10 +64,12 @@ namespace cv { namespace gpu {
}
}}
#if defined(__GNUC__)
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
#ifndef cudaSafeCall
#if defined(__GNUC__)
#define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
#endif
#endif
namespace cv { namespace gpu
@@ -104,7 +106,7 @@ namespace cv { namespace gpu
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
}
}}

View File

@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail

View File

@@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cudev
void writeScalar(const uchar* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
void writeScalar(const schar* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
void writeScalar(const ushort* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
void writeScalar(const short* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
void writeScalar(const int* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
void writeScalar(const float* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
void writeScalar(const double* vals)
{
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
template<typename T>
@@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
cvCudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cvCudaSafeCall ( cudaDeviceSynchronize() );
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
cvCudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cvCudaSafeCall ( cudaDeviceSynchronize() );
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
@@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cudev
template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cvCudaSafeCall( cudaSetDoubleForDevice(&beta) );
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}

View File

@@ -131,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cvCudaSafeCall(err);
cudaSafeCall(err);
return false;
}
void cv::gpu::Stream::waitForCompletion()
{
cudaStream_t stream = Impl::getStream(impl);
cvCudaSafeCall( cudaStreamSynchronize(stream) );
cudaSafeCall( cudaStreamSynchronize(stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
@@ -148,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
@@ -157,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
@@ -166,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
@@ -175,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
@@ -184,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
@@ -201,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
{
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
return;
}
@@ -212,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
{
int ival = saturate_cast<uchar>(val[0]);
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
return;
}
}
@@ -299,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
cudaStream_t stream = Impl::getStream(impl);
cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
#else
(void) callback;
(void) userData;
@@ -328,7 +328,7 @@ void cv::gpu::Stream::create()
release();
cudaStream_t stream;
cvCudaSafeCall( cudaStreamCreate( &stream ) );
cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
@@ -340,7 +340,7 @@ void cv::gpu::Stream::release()
{
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
{
cvCudaSafeCall( cudaStreamDestroy(impl->stream) );
cudaSafeCall( cudaStreamDestroy(impl->stream) );
cv::fastFree(impl);
}
}

View File

@@ -91,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
if (error == cudaErrorNoDevice)
return 0;
cvCudaSafeCall( error );
cudaSafeCall( error );
return count;
}
void cv::gpu::setDevice(int device)
{
cvCudaSafeCall( cudaSetDevice( device ) );
cudaSafeCall( cudaSetDevice( device ) );
}
int cv::gpu::getDevice()
{
int device;
cvCudaSafeCall( cudaGetDevice( &device ) );
cudaSafeCall( cudaGetDevice( &device ) );
return device;
}
void cv::gpu::resetDevice()
{
cvCudaSafeCall( cudaDeviceReset() );
cudaSafeCall( cudaDeviceReset() );
}
namespace
@@ -302,7 +302,7 @@ namespace
if (!props_[devID])
{
props_[devID] = new cudaDeviceProp;
cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
}
return props_[devID];
@@ -322,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
if (prevDeviceID != device_id_)
setDevice(device_id_);
cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
if (prevDeviceID != device_id_)
setDevice(prevDeviceID);
@@ -408,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf("Device count: %d\n", count);
int driverVersion = 0, runtimeVersion = 0;
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
const char *computeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
@@ -423,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
for(int dev = beg; dev < end; ++dev)
{
cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
@@ -485,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
int end = valid ? device+1 : count;
int driverVersion = 0, runtimeVersion = 0;
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
for(int dev = beg; dev < end; ++dev)
{
cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
@@ -983,7 +983,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
@@ -998,7 +998,7 @@ namespace
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1040,7 +1040,7 @@ namespace
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
@@ -1057,7 +1057,7 @@ namespace
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1088,7 +1088,7 @@ namespace
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
@@ -1105,7 +1105,7 @@ namespace
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1131,7 +1131,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cvCudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
@@ -1148,15 +1148,15 @@ namespace
public:
void copy(const Mat& src, GpuMat& dst) const
{
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
}
void copy(const GpuMat& src, Mat& dst) const
{
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
}
void copy(const GpuMat& src, GpuMat& dst) const
{
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
}
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
@@ -1301,7 +1301,7 @@ namespace
{
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{
cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
return;
}
@@ -1312,7 +1312,7 @@ namespace
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{
int val = saturate_cast<uchar>(s[0]);
cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
return;
}
}
@@ -1367,7 +1367,7 @@ namespace
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
{
cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
}
void free(void* devPtr) const

View File

@@ -191,18 +191,18 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return G
void cv::gpu::registerPageLocked(Mat& m)
{
cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
}
void cv::gpu::unregisterPageLocked(Mat& m)
{
cvCudaSafeCall( cudaHostUnregister(m.ptr()) );
cudaSafeCall( cudaHostUnregister(m.ptr()) );
}
bool cv::gpu::CudaMem::canMapHostMemory()
{
cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
return (prop.canMapHostMemory != 0) ? true : false;
}
@@ -237,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
if (_alloc_type == ALLOC_ZEROCOPY)
{
cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
step = alignUpStep(step, prop.textureAlignment);
}
int64 _nettosize = (int64)step*rows;
@@ -252,9 +252,9 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
switch (alloc_type)
{
case ALLOC_PAGE_LOCKED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
}
@@ -273,7 +273,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
GpuMat res;
void *pdev;
cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
res = GpuMat(rows, cols, type(), pdev, step);
return res;
@@ -283,7 +283,7 @@ void cv::gpu::CudaMem::release()
{
if( refcount && CV_XADD(refcount, -1) == 1 )
{
cvCudaSafeCall( cudaFreeHost(datastart ) );
cudaSafeCall( cudaFreeHost(datastart ) );
fastFree(refcount);
}
data = datastart = dataend = 0;

View File

@@ -133,7 +133,7 @@ void cv::gpu::setGlDevice(int device)
(void) device;
throw_no_cuda();
#else
cvCudaSafeCall( cudaGLSetGLDevice(device) );
cudaSafeCall( cudaGLSetGLDevice(device) );
#endif
#endif
}
@@ -184,7 +184,7 @@ namespace
return;
cudaGraphicsResource_t resource;
cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
release();
@@ -217,7 +217,7 @@ namespace
CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
{
if (resource_)
cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
}
CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
@@ -240,14 +240,14 @@ namespace
void* dst;
size_t size;
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
CV_DbgAssert( width * height == size );
if (stream == 0)
cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
else
cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
}
void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
@@ -259,14 +259,14 @@ namespace
void* src;
size_t size;
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
CV_DbgAssert( width * height == size );
if (stream == 0)
cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
else
cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
}
void* CudaResource::map(cudaStream_t stream)
@@ -277,7 +277,7 @@ namespace
void* ptr;
size_t size;
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
h.reset();