fixed norm diff function (it uses pre-allocated buffer now)
This commit is contained in:
		| @@ -187,10 +187,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) | |||||||
|     CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); |     CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); | ||||||
|     CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); |     CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); | ||||||
|  |  | ||||||
|     typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, | #if CUDA_VERSION < 5050 | ||||||
|         NppiSize oSizeROI, Npp64f* pRetVal); |     typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); | ||||||
|  |  | ||||||
|     static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; |     static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; | ||||||
|  | #else | ||||||
|  |     typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, | ||||||
|  |         NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer); | ||||||
|  |  | ||||||
|  |     typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize); | ||||||
|  |  | ||||||
|  |     static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; | ||||||
|  |  | ||||||
|  |     static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|     NppiSize sz; |     NppiSize sz; | ||||||
|     sz.width  = src1.cols; |     sz.width  = src1.cols; | ||||||
| @@ -202,7 +212,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) | |||||||
|  |  | ||||||
|     DeviceBuffer dbuf; |     DeviceBuffer dbuf; | ||||||
|  |  | ||||||
|     nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); | #if CUDA_VERSION < 5050 | ||||||
|  |     nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); | ||||||
|  | #else | ||||||
|  |     int bufSize; | ||||||
|  |     buf_size_funcs[funcIdx](sz, &bufSize); | ||||||
|  |  | ||||||
|  |     GpuMat buf(1, bufSize, CV_8UC1); | ||||||
|  |  | ||||||
|  |     nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) ); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|     cudaSafeCall( cudaDeviceSynchronize() ); |     cudaSafeCall( cudaDeviceSynchronize() ); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Vladislav Vinogradov
					Vladislav Vinogradov