restore cudaSafeCall
This commit is contained in:
@@ -64,10 +64,12 @@ namespace cv { namespace gpu {
|
||||
}
|
||||
}}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
|
||||
#ifndef cudaSafeCall
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
@@ -104,7 +106,7 @@ namespace cv { namespace gpu
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
@@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
@@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
|
@@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void writeScalar(const uchar* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||
}
|
||||
void writeScalar(const schar* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||
}
|
||||
void writeScalar(const ushort* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||
}
|
||||
void writeScalar(const short* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||
}
|
||||
void writeScalar(const int* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||
}
|
||||
void writeScalar(const float* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||
}
|
||||
void writeScalar(const double* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall ( cudaDeviceSynchronize() );
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||
@@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall ( cudaDeviceSynchronize() );
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
|
||||
@@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
template<typename T, typename D, typename S>
|
||||
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cvCudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
|
||||
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
@@ -131,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess)
|
||||
return err == cudaSuccess;
|
||||
|
||||
cvCudaSafeCall(err);
|
||||
cudaSafeCall(err);
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::waitForCompletion()
|
||||
{
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
cvCudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
@@ -148,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
|
||||
@@ -157,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
|
||||
@@ -166,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
|
||||
@@ -175,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
|
||||
@@ -184,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
@@ -201,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
|
||||
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
|
||||
{
|
||||
int ival = saturate_cast<uchar>(val[0]);
|
||||
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -299,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
|
||||
cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
|
||||
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
|
||||
#else
|
||||
(void) callback;
|
||||
(void) userData;
|
||||
@@ -328,7 +328,7 @@ void cv::gpu::Stream::create()
|
||||
release();
|
||||
|
||||
cudaStream_t stream;
|
||||
cvCudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
cudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
|
||||
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
|
||||
|
||||
@@ -340,7 +340,7 @@ void cv::gpu::Stream::release()
|
||||
{
|
||||
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
|
||||
{
|
||||
cvCudaSafeCall( cudaStreamDestroy(impl->stream) );
|
||||
cudaSafeCall( cudaStreamDestroy(impl->stream) );
|
||||
cv::fastFree(impl);
|
||||
}
|
||||
}
|
||||
|
@@ -91,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
|
||||
if (error == cudaErrorNoDevice)
|
||||
return 0;
|
||||
|
||||
cvCudaSafeCall( error );
|
||||
cudaSafeCall( error );
|
||||
return count;
|
||||
}
|
||||
|
||||
void cv::gpu::setDevice(int device)
|
||||
{
|
||||
cvCudaSafeCall( cudaSetDevice( device ) );
|
||||
cudaSafeCall( cudaSetDevice( device ) );
|
||||
}
|
||||
|
||||
int cv::gpu::getDevice()
|
||||
{
|
||||
int device;
|
||||
cvCudaSafeCall( cudaGetDevice( &device ) );
|
||||
cudaSafeCall( cudaGetDevice( &device ) );
|
||||
return device;
|
||||
}
|
||||
|
||||
void cv::gpu::resetDevice()
|
||||
{
|
||||
cvCudaSafeCall( cudaDeviceReset() );
|
||||
cudaSafeCall( cudaDeviceReset() );
|
||||
}
|
||||
|
||||
namespace
|
||||
@@ -302,7 +302,7 @@ namespace
|
||||
if (!props_[devID])
|
||||
{
|
||||
props_[devID] = new cudaDeviceProp;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
}
|
||||
|
||||
return props_[devID];
|
||||
@@ -322,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(device_id_);
|
||||
|
||||
cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(prevDeviceID);
|
||||
@@ -408,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf("Device count: %d\n", count);
|
||||
|
||||
int driverVersion = 0, runtimeVersion = 0;
|
||||
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
|
||||
const char *computeMode[] = {
|
||||
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
|
||||
@@ -423,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
for(int dev = beg; dev < end; ++dev)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
|
||||
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
|
||||
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
|
||||
@@ -485,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
|
||||
int end = valid ? device+1 : count;
|
||||
|
||||
int driverVersion = 0, runtimeVersion = 0;
|
||||
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
|
||||
for(int dev = beg; dev < end; ++dev)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
|
||||
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
|
||||
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
|
||||
@@ -983,7 +983,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
||||
@@ -998,7 +998,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1040,7 +1040,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
||||
@@ -1057,7 +1057,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1088,7 +1088,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
||||
@@ -1105,7 +1105,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1131,7 +1131,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1148,15 +1148,15 @@ namespace
|
||||
public:
|
||||
void copy(const Mat& src, GpuMat& dst) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
}
|
||||
void copy(const GpuMat& src, Mat& dst) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
void copy(const GpuMat& src, GpuMat& dst) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||
}
|
||||
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
||||
@@ -1301,7 +1301,7 @@ namespace
|
||||
{
|
||||
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1312,7 +1312,7 @@ namespace
|
||||
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||
{
|
||||
int val = saturate_cast<uchar>(s[0]);
|
||||
cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1367,7 +1367,7 @@ namespace
|
||||
|
||||
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||
}
|
||||
|
||||
void free(void* devPtr) const
|
||||
|
@@ -191,18 +191,18 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return G
|
||||
|
||||
void cv::gpu::registerPageLocked(Mat& m)
|
||||
{
|
||||
cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
|
||||
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
|
||||
}
|
||||
|
||||
void cv::gpu::unregisterPageLocked(Mat& m)
|
||||
{
|
||||
cvCudaSafeCall( cudaHostUnregister(m.ptr()) );
|
||||
cudaSafeCall( cudaHostUnregister(m.ptr()) );
|
||||
}
|
||||
|
||||
bool cv::gpu::CudaMem::canMapHostMemory()
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
return (prop.canMapHostMemory != 0) ? true : false;
|
||||
}
|
||||
|
||||
@@ -237,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
if (_alloc_type == ALLOC_ZEROCOPY)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
step = alignUpStep(step, prop.textureAlignment);
|
||||
}
|
||||
int64 _nettosize = (int64)step*rows;
|
||||
@@ -252,9 +252,9 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
|
||||
switch (alloc_type)
|
||||
{
|
||||
case ALLOC_PAGE_LOCKED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
|
||||
}
|
||||
|
||||
@@ -273,7 +273,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
|
||||
GpuMat res;
|
||||
|
||||
void *pdev;
|
||||
cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
res = GpuMat(rows, cols, type(), pdev, step);
|
||||
|
||||
return res;
|
||||
@@ -283,7 +283,7 @@ void cv::gpu::CudaMem::release()
|
||||
{
|
||||
if( refcount && CV_XADD(refcount, -1) == 1 )
|
||||
{
|
||||
cvCudaSafeCall( cudaFreeHost(datastart ) );
|
||||
cudaSafeCall( cudaFreeHost(datastart ) );
|
||||
fastFree(refcount);
|
||||
}
|
||||
data = datastart = dataend = 0;
|
||||
|
@@ -133,7 +133,7 @@ void cv::gpu::setGlDevice(int device)
|
||||
(void) device;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cvCudaSafeCall( cudaGLSetGLDevice(device) );
|
||||
cudaSafeCall( cudaGLSetGLDevice(device) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
@@ -184,7 +184,7 @@ namespace
|
||||
return;
|
||||
|
||||
cudaGraphicsResource_t resource;
|
||||
cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
|
||||
cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
|
||||
|
||||
release();
|
||||
|
||||
@@ -217,7 +217,7 @@ namespace
|
||||
CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
|
||||
{
|
||||
if (resource_)
|
||||
cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
|
||||
cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
|
||||
}
|
||||
|
||||
CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
|
||||
@@ -240,14 +240,14 @@ namespace
|
||||
|
||||
void* dst;
|
||||
size_t size;
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
|
||||
|
||||
CV_DbgAssert( width * height == size );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
|
||||
@@ -259,14 +259,14 @@ namespace
|
||||
|
||||
void* src;
|
||||
size_t size;
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
|
||||
|
||||
CV_DbgAssert( width * height == size );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void* CudaResource::map(cudaStream_t stream)
|
||||
@@ -277,7 +277,7 @@ namespace
|
||||
|
||||
void* ptr;
|
||||
size_t size;
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
|
||||
|
||||
h.reset();
|
||||
|
||||
|
Reference in New Issue
Block a user