added lightweight class DeviceBuffer to matrix_reductions.cpp
This commit is contained in:
parent
3c2d7b951a
commit
6a03be2632
@ -66,6 +66,42 @@ int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_nogpu(); return 0; }
|
||||
|
||||
#else
|
||||
|
||||
namespace
|
||||
{
|
||||
class DeviceBuffer
|
||||
{
|
||||
public:
|
||||
explicit DeviceBuffer(int count_ = 1) : count(count_)
|
||||
{
|
||||
cudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) );
|
||||
}
|
||||
~DeviceBuffer()
|
||||
{
|
||||
cudaSafeCall( cudaFree(pdev) );
|
||||
}
|
||||
|
||||
operator double*() {return pdev;}
|
||||
|
||||
void download(double* hptr)
|
||||
{
|
||||
double hbuf;
|
||||
cudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
*hptr = hbuf;
|
||||
}
|
||||
void download(double** hptrs)
|
||||
{
|
||||
AutoBuffer<double, 2 * sizeof(double)> hbuf(count);
|
||||
cudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
for (int i = 0; i < count; ++i)
|
||||
*hptrs[i] = hbuf[i];
|
||||
}
|
||||
|
||||
private:
|
||||
double* pdev;
|
||||
int count;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// meanStdDev
|
||||
@ -80,18 +116,14 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
|
||||
|
||||
#if NPP_VERSION_MAJOR >= 4
|
||||
|
||||
GpuMat d_buf(1, 2, CV_64F);
|
||||
DeviceBuffer dbuf(2);
|
||||
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, d_buf.ptr<double>(), d_buf.ptr<double>() + 1) );
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), src.step, sz, dbuf, (double*)dbuf + 1) );
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
|
||||
double buf[2];
|
||||
|
||||
Mat _buf(1, 2, CV_64F, buf);
|
||||
d_buf.download(_buf);
|
||||
mean[0] = buf[0];
|
||||
stddev[0] = buf[1];
|
||||
double* ptrs[2] = {mean.val, stddev.val};
|
||||
dbuf.download(ptrs);
|
||||
|
||||
#else
|
||||
|
||||
@ -151,26 +183,21 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
|
||||
int funcIdx = normType >> 1;
|
||||
|
||||
double retVal;
|
||||
|
||||
#if NPP_VERSION_MAJOR >= 4
|
||||
|
||||
GpuMat d_buf(1, 1, CV_64F);
|
||||
DeviceBuffer dbuf;
|
||||
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step,
|
||||
src2.ptr<Npp8u>(), src2.step,
|
||||
sz, d_buf.ptr<double>()) );
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, sz, dbuf) );
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
|
||||
double retVal;
|
||||
Mat _buf(1, 1, CV_64F, &retVal);
|
||||
d_buf.download(_buf);
|
||||
dbuf.download(&retVal);
|
||||
|
||||
#else
|
||||
|
||||
double retVal;
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step,
|
||||
src2.ptr<Npp8u>(), src2.step,
|
||||
sz, &retVal) );
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, src2.ptr<Npp8u>(), src2.step, sz, &retVal) );
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user