diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 9cd070999..16539b20a 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -622,6 +622,10 @@ namespace cv //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, Stream& stream = Stream::Null()); + //! builds spherical warping maps + CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s, + GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null()); + //! rotate 8bit single or four channel image //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC //! supports CV_8UC1, CV_8UC4 types @@ -721,12 +725,21 @@ namespace cv CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method); //! downsamples image - CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst, int k=2); + CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst); + + //! upsamples image + CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst); + + //! smoothes the source image and downsamples it + CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst); + + //! upsamples the source image and then smoothes it + CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst); //! performs linear blending of two images //! to avoid accuracy errors sum of weigths shouldn't be very close to zero CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, - GpuMat& result, Stream& stream = Stream::Null()); + GpuMat& result, Stream& stream = Stream::Null()); ////////////////////////////// Matrix reductions ////////////////////////////// diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index de7e865e0..28d4eabe5 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -647,4 +647,26 @@ namespace cv { namespace gpu { namespace mathfunc template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, int thresh, int maxVal, int type, cudaStream_t stream); template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream); template void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, double thresh, double maxVal, int type, cudaStream_t stream); + + + ////////////////////////////////////////////////////////////////////////// + // subtract + + template + class SubtractOp + { + public: + __device__ __forceinline__ T operator()(const T& l, const T& r) const + { + return l - r; + } + }; + + template + void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream) + { + transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, SubtractOp(), stream); + } + + template void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream); }}} diff --git a/modules/gpu/src/cuda/filters.cu b/modules/gpu/src/cuda/filters.cu index 5b12d9dd4..779da8725 100644 --- a/modules/gpu/src/cuda/filters.cu +++ b/modules/gpu/src/cuda/filters.cu @@ -224,6 +224,7 @@ namespace cv { namespace gpu { namespace filters template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); + template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); }}} @@ -275,7 +276,7 @@ namespace cv { namespace gpu { namespace filters dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); - B b(src.rows, src.step / src.elemSize()); + B b(src.rows, src.step); if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1)) { @@ -364,6 +365,7 @@ namespace cv { namespace gpu { namespace filters template void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); + template void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); template void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream); }}} diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index 5afb3dfb5..6ef4c4cbb 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -42,14 +42,6 @@ #include "internal_shared.hpp" -#ifndef CV_PI_F - #ifndef CV_PI - #define CV_PI_F 3.14159265f - #else - #define CV_PI_F ((float)CV_PI) - #endif -#endif - // Other values are not supported #define CELL_WIDTH 8 #define CELL_HEIGHT 8 @@ -776,4 +768,4 @@ static void resize_for_hog(const DevMem2D& src, DevMem2D dst, TEX& tex) void resize_8UC1(const DevMem2D& src, DevMem2D dst) { resize_for_hog (src, dst, resize8UC1_tex); } void resize_8UC4(const DevMem2D& src, DevMem2D dst) { resize_for_hog(src, dst, resize8UC4_tex); } -}}} \ No newline at end of file +}}} diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 82f578a00..c1e1ef44c 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -66,8 +66,8 @@ namespace cv { namespace gpu { namespace imgproc } } - __global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy, size_t map_step, - uchar* dst, size_t dst_step, int width, int height) + __global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy, + size_t map_step, uchar* dst, size_t dst_step, int width, int height) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -131,7 +131,7 @@ namespace cv { namespace gpu { namespace imgproc grid.x = divUp(dst.cols, threads.x); grid.y = divUp(dst.rows, threads.y); - tex_remap.filterMode = cudaFilterModeLinear; + tex_remap.filterMode = cudaFilterModeLinear; tex_remap.addressMode[0] = tex_remap.addressMode[1] = cudaAddressModeWrap; cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D(0, tex_remap, src.data, desc, src.cols, src.rows, src.step) ); @@ -139,7 +139,7 @@ namespace cv { namespace gpu { namespace imgproc remap_1c<<>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaUnbindTexture(tex_remap) ); } @@ -151,9 +151,9 @@ namespace cv { namespace gpu { namespace imgproc grid.y = divUp(dst.rows, threads.y); remap_3c<<>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows); - cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + cudaSafeCall( cudaGetLastError() ); + cudaSafeCall( cudaDeviceSynchronize() ); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// @@ -768,6 +768,7 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall( cudaDeviceSynchronize() ); } + ////////////////////////////////////////////////////////////////////////// // mulSpectrums @@ -796,6 +797,7 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall( cudaDeviceSynchronize() ); } + ////////////////////////////////////////////////////////////////////////// // mulSpectrums_CONJ @@ -825,6 +827,7 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall( cudaDeviceSynchronize() ); } + ////////////////////////////////////////////////////////////////////////// // mulAndScaleSpectrums @@ -855,6 +858,7 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall( cudaDeviceSynchronize() ); } + ////////////////////////////////////////////////////////////////////////// // mulAndScaleSpectrums_CONJ @@ -885,34 +889,173 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall( cudaDeviceSynchronize() ); } + ///////////////////////////////////////////////////////////////////////// // downsample - template - __global__ void downsampleKernel(const PtrStep_ src, int rows, int cols, int k, PtrStep_ dst) + template + __global__ void downsampleKernel(const PtrStep_ src, DevMem2D_ dst) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x < cols && y < rows) - dst.ptr(y)[x] = src.ptr(y * k)[x * k]; + if (x < dst.cols && y < dst.rows) + { + int ch_x = x / cn; + dst.ptr(y)[x] = src.ptr(y*2)[ch_x*2*cn + x - ch_x*cn]; + } } - template - void downsampleCaller(const PtrStep_ src, int rows, int cols, int k, PtrStep_ dst) + template + void downsampleCaller(const DevMem2D src, DevMem2D dst) { - dim3 threads(16, 16); + dim3 threads(32, 8); + dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y)); + + downsampleKernel<<>>(DevMem2D_(src), DevMem2D_(dst)); + cudaSafeCall(cudaGetLastError()); + cudaSafeCall(cudaDeviceSynchronize()); + } + + + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + template void downsampleCaller(const DevMem2D src, DevMem2D dst); + + + ////////////////////////////////////////////////////////////////////////// + // upsample + + template + __global__ void upsampleKernel(const PtrStep_ src, DevMem2D_ dst) + { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < dst.cols && y < dst.rows) + { + int ch_x = x / cn; + T val = ((ch_x & 1) || (y & 1)) ? 0 : src.ptr(y/2)[ch_x/2*cn + x - ch_x*cn]; + dst.ptr(y)[x] = val; + } + } + + + template + void upsampleCaller(const DevMem2D src, DevMem2D dst) + { + dim3 threads(32, 8); + dim3 grid(divUp(dst.cols, threads.x), divUp(dst.rows, threads.y)); + + upsampleKernel<<>>(DevMem2D_(src), DevMem2D_(dst)); + cudaSafeCall(cudaGetLastError()); + cudaSafeCall(cudaDeviceSynchronize()); + } + + + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + template void upsampleCaller(const DevMem2D src, DevMem2D dst); + + + ////////////////////////////////////////////////////////////////////////// + // buildWarpMaps + + namespace build_warp_maps + { + __constant__ float cr[9]; + __constant__ float crinv[9]; + __constant__ float cf, cs; + __constant__ float chalf_w, chalf_h; + } + + + class SphericalMapper + { + public: + static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y) + { + using namespace build_warp_maps; + + v /= cs; + u /= cs; + + float sinv = sinf(v); + float x_ = sinv * sinf(u); + float y_ = -cosf(v); + float z_ = sinv * cosf(u); + + float z; + x = crinv[0]*x_ + crinv[1]*y_ + crinv[2]*z_; + y = crinv[3]*x_ + crinv[4]*y_ + crinv[5]*z_; + z = crinv[6]*x_ + crinv[7]*y_ + crinv[8]*z_; + + x = cf*x/z + chalf_w; + y = cf*y/z + chalf_h; + } + }; + + + template + __global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows, + PtrStepf map_x, PtrStepf map_y) + { + int du = blockIdx.x * blockDim.x + threadIdx.x; + int dv = blockIdx.y * blockDim.y + threadIdx.y; + if (du < cols && dv < rows) + { + float u = tl_u + du; + float v = tl_v + dv; + float x, y; + Mapper::mapBackward(u, v, x, y); + map_x.ptr(dv)[du] = x; + map_y.ptr(dv)[du] = y; + } + } + + + void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y, + const float r[9], const float rinv[9], float f, float s, + float half_w, float half_h, cudaStream_t stream) + { + cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr, r, 9*sizeof(float))); + cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::crinv, rinv, 9*sizeof(float))); + cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cf, &f, sizeof(float))); + cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cs, &s, sizeof(float))); + cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::chalf_w, &half_w, sizeof(float))); + cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::chalf_h, &half_h, sizeof(float))); + + int cols = map_x.cols; + int rows = map_x.rows; + + dim3 threads(32, 8); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - downsampleKernel<<>>(src, rows, cols, k, dst); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); + buildWarpMapsKernel<<>>(tl_u, tl_v, cols, rows, map_x, map_y); + cudaSafeCall(cudaGetLastError()); + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); } - template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst); - template void downsampleCaller(const PtrStepf src, int rows, int cols, int k, PtrStepf dst); }}} diff --git a/modules/gpu/src/cuda/internal_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp index 6e3f54ec2..860d62715 100644 --- a/modules/gpu/src/cuda/internal_shared.hpp +++ b/modules/gpu/src/cuda/internal_shared.hpp @@ -49,6 +49,14 @@ #include "npp.h" #include "NPP_staging.hpp" +#ifndef CV_PI_F + #ifndef CV_PI + #define CV_PI_F 3.14159265f + #else + #define CV_PI_F ((float)CV_PI) + #endif +#endif + namespace cv { namespace gpu diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 3fdba4a48..c392df7c0 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -174,9 +174,22 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream)); } +namespace cv { namespace gpu { namespace mathfunc +{ + template + void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream); +}}} + void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) { - nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, StreamAccessor::getStream(stream)); + if (src1.depth() == CV_16S && src2.depth() == CV_16S) + { + CV_Assert(src1.size() == src2.size()); + dst.create(src1.size(), src1.type()); + mathfunc::subtractCaller(src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream)); + } + else + nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, StreamAccessor::getStream(stream)); } void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) @@ -755,4 +768,4 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double return thresh; } -#endif \ No newline at end of file +#endif diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 61e15e273..97a4d063d 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -192,7 +192,8 @@ namespace Size src_size = src.size(); dst.create(src_size, dstType); - dstBuf.create(src_size, bufType); + ensureSizeIsEnough(src_size, bufType, dstBuf); + //dstBuf.create(src_size, bufType); if (stream) { @@ -717,7 +718,7 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_16SC1 || srcType == CV_16SC2 - || srcType == CV_32SC1 || srcType == CV_32FC1); + || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1); CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType)); @@ -747,6 +748,9 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, case CV_16SC2: func = filters::linearRowFilter_gpu; break; + case CV_16SC3: + func = filters::linearRowFilter_gpu; + break; case CV_32SC1: func = filters::linearRowFilter_gpu; break; @@ -827,8 +831,8 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds int gpuBorderType; CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); - CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC1 || dstType == CV_16SC2 - || dstType == CV_32SC1 || dstType == CV_32FC1); + CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC1 || dstType == CV_16SC2 + || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1); CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType)); @@ -858,6 +862,9 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds case CV_16SC2: func = filters::linearColumnFilter_gpu; break; + case CV_16SC3: + func = filters::linearColumnFilter_gpu; + break; case CV_32SC1: func = filters::linearColumnFilter_gpu; break; diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 38a2c3520..85e99f8c6 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -56,6 +56,8 @@ void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); } void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); } void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); } +void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, double, double, + GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); } void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } @@ -76,7 +78,11 @@ void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); } void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); } -void cv::gpu::downsample(const GpuMat&, GpuMat&, int) { throw_nogpu(); } +void cv::gpu::downsample(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::upsample(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::pyrDown(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::pyrUp(const GpuMat&, GpuMat&) { throw_nogpu(); } + #else /* !defined (HAVE_CUDA) */ @@ -504,6 +510,30 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size nppWarpCaller(src, dst, coeffs, dsize, flags, npp_warpPerspective_8u, npp_warpPerspective_16u, npp_warpPerspective_32s, npp_warpPerspective_32f, StreamAccessor::getStream(s)); } + +////////////////////////////////////////////////////////////////////////////// +// buildWarpSphericalMaps + +namespace cv { namespace gpu { namespace imgproc +{ + void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y, + const float r[9], const float rinv[9], float f, float s, + float half_w, float half_h, cudaStream_t stream); +}}} + +void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s, + GpuMat& map_x, GpuMat& map_y, Stream& stream) +{ + CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F); + Mat Rinv = R.inv(); + CV_Assert(Rinv.isContinuous()); + + map_x.create(dst_roi.size(), CV_32F); + map_y.create(dst_roi.size(), CV_32F); + imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr(), Rinv.ptr(), + f, s, 0.5f*src_size.width, 0.5f*src_size.height, StreamAccessor::getStream(stream)); +} + //////////////////////////////////////////////////////////////////////// // rotate @@ -1333,32 +1363,96 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, cufftSafeCall(cufftDestroy(planC2R)); } + //////////////////////////////////////////////////////////////////// // downsample namespace cv { namespace gpu { namespace imgproc { - template - void downsampleCaller(const PtrStep_ src, int rows, int cols, int k, PtrStep_ dst); + template + void downsampleCaller(const DevMem2D src, DevMem2D dst); }}} -void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, int k) + +void cv::gpu::downsample(const GpuMat& src, GpuMat& dst) { - CV_Assert(src.channels() == 1); + CV_Assert(src.depth() < CV_64F && src.channels() <= 4); - dst.create((src.rows + k - 1) / k, (src.cols + k - 1) / k, src.type()); + typedef void (*Caller)(const DevMem2D, DevMem2D); + static const Caller callers[6][4] = + {{imgproc::downsampleCaller, imgproc::downsampleCaller, + imgproc::downsampleCaller, imgproc::downsampleCaller}, + {0,0,0,0}, {0,0,0,0}, + {imgproc::downsampleCaller, imgproc::downsampleCaller, + imgproc::downsampleCaller, imgproc::downsampleCaller}, + {0,0,0,0}, + {imgproc::downsampleCaller, imgproc::downsampleCaller, + imgproc::downsampleCaller, imgproc::downsampleCaller}}; - switch (src.depth()) - { - case CV_8U: - imgproc::downsampleCaller(src, dst.rows, dst.cols, k, dst); - break; - case CV_32F: - imgproc::downsampleCaller(src, dst.rows, dst.cols, k, dst); - break; - default: - CV_Error(CV_StsUnsupportedFormat, "bad image depth in downsample function"); - } + Caller caller = callers[src.depth()][src.channels()-1]; + if (!caller) + CV_Error(CV_StsUnsupportedFormat, "bad number of channels"); + + dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); + caller(src, dst.reshape(1)); +} + + +////////////////////////////////////////////////////////////////////////////// +// upsample + +namespace cv { namespace gpu { namespace imgproc +{ + template + void upsampleCaller(const DevMem2D src, DevMem2D dst); +}}} + + +void cv::gpu::upsample(const GpuMat& src, GpuMat& dst) +{ + CV_Assert(src.depth() < CV_64F && src.channels() <= 4); + + typedef void (*Caller)(const DevMem2D, DevMem2D); + static const Caller callers[6][5] = + {{imgproc::upsampleCaller, imgproc::upsampleCaller, + imgproc::upsampleCaller, imgproc::upsampleCaller}, + {0,0,0,0}, {0,0,0,0}, + {imgproc::upsampleCaller, imgproc::upsampleCaller, + imgproc::upsampleCaller, imgproc::upsampleCaller}, + {0,0,0,0}, + {imgproc::upsampleCaller, imgproc::upsampleCaller, + imgproc::upsampleCaller, imgproc::upsampleCaller}}; + + Caller caller = callers[src.depth()][src.channels()-1]; + if (!caller) + CV_Error(CV_StsUnsupportedFormat, "bad number of channels"); + + dst.create(src.rows*2, src.cols*2, src.type()); + caller(src, dst.reshape(1)); +} + + +////////////////////////////////////////////////////////////////////////////// +// pyrDown + +void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst) +{ + Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth())); + GpuMat buf; + sepFilter2D(src, buf, src.depth(), ker, ker); + downsample(buf, dst); +} + + +////////////////////////////////////////////////////////////////////////////// +// pyrUp + +void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst) +{ + GpuMat buf; + upsample(src, buf); + Mat ker = getGaussianKernel(5, 0, std::max(CV_32F, src.depth())) * 2; + sepFilter2D(buf, dst, buf.depth(), ker, ker); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 3d802807d..cc2537d9f 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -594,8 +594,9 @@ void cv::gpu::createContinuous(int rows, int cols, int type, GpuMat& m) void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m) { if (m.type() == type && m.rows >= rows && m.cols >= cols) - return; - m.create(rows, cols, type); + m = m(Rect(0, 0, cols, rows)); + else + m.create(rows, cols, type); } diff --git a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp index e6adbe6ac..346ff847c 100644 --- a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp +++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp @@ -104,13 +104,13 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ D at_low(int i, const T* data) const { - return saturate_cast(data[idx_low(i) * step]); + return saturate_cast(*(const D*)((const char*)data + idx_low(i)*step)); } template __device__ __forceinline__ D at_high(int i, const T* data) const { - return saturate_cast(data[idx_high(i) * step]); + return saturate_cast(*(const D*)((const char*)data + idx_high(i)*step)); } private: @@ -174,13 +174,13 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ D at_low(int i, const T* data) const { - return saturate_cast(data[idx_low(i) * step]); + return saturate_cast(*(const D*)((const char*)data + idx_low(i)*step)); } template __device__ __forceinline__ D at_high(int i, const T* data) const { - return saturate_cast(data[idx_high(i) * step]); + return saturate_cast(*(const D*)((const char*)data + idx_high(i)*step)); } private: @@ -222,13 +222,13 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ D at_low(int i, const T* data) const { - return i >= 0 ? saturate_cast(data[i * step]) : val; + return i >= 0 ? saturate_cast(*(const D*)((const char*)data + i*step)) : val; } template __device__ __forceinline__ D at_high(int i, const T* data) const { - return i < len ? saturate_cast(data[i * step]) : val; + return i < len ? saturate_cast(*(const D*)((const char*)data + i*step)) : val; } bool is_range_safe(int mini, int maxi) const @@ -241,6 +241,25 @@ namespace cv { namespace gpu { namespace device int step; D val; }; + + + template + struct BrdConstant + { + BrdConstant(int w, int h, const OutT &val = VecTraits::all(0)) : w(w), h(h), val(val) {} + + __device__ __forceinline__ OutT at(int x, int y, const uchar* data, int step) const + { + if (x >= 0 && x <= w - 1 && y >= 0 && y <= h - 1) + return ((const OutT*)(data + y * step))[x]; + return val; + } + + private: + int w, h; + OutT val; + }; + }}} #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 836f2de42..d177334f0 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -1372,99 +1372,6 @@ TEST_P(ReprojectImageTo3D, Accuracy) INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::ValuesIn(devices())); -//////////////////////////////////////////////////////////////////////////////// -// Downsample - -struct Downsample : testing::TestWithParam< std::tr1::tuple > -{ - cv::gpu::DeviceInfo devInfo; - int k; - - cv::Size size; - - cv::Size dst_gold_size; - - virtual void SetUp() - { - devInfo = std::tr1::get<0>(GetParam()); - k = std::tr1::get<1>(GetParam()); - - cv::gpu::setDevice(devInfo.deviceID()); - - cv::RNG& rng = cvtest::TS::ptr()->get_rng(); - - size = cv::Size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000); - - dst_gold_size = cv::Size((size.width + k - 1) / k, (size.height + k - 1) / k); - } -}; - -TEST_P(Downsample, Accuracy8U) -{ - PRINT_PARAM(devInfo); - PRINT_PARAM(size); - PRINT_PARAM(k); - - cv::RNG& rng = cvtest::TS::ptr()->get_rng(); - - cv::Mat src = cvtest::randomMat(rng, size, CV_8U, 0, 255, false); - - cv::Mat dst; - - ASSERT_NO_THROW( - cv::gpu::GpuMat gpures; - cv::gpu::downsample(cv::gpu::GpuMat(src), gpures, k); - gpures.download(dst); - ); - - ASSERT_EQ(dst_gold_size, dst.size()); - - for (int y = 0; y < dst.rows; ++y) - { - for (int x = 0; x < dst.cols; ++x) - { - int gold = src.at(y * k, x * k); - int res = dst.at(y, x); - ASSERT_EQ(gold, res); - } - } -} - -TEST_P(Downsample, Accuracy32F) -{ - PRINT_PARAM(devInfo); - PRINT_PARAM(size); - PRINT_PARAM(k); - - cv::RNG& rng = cvtest::TS::ptr()->get_rng(); - - cv::Mat src = cvtest::randomMat(rng, size, CV_32F, 0, 1.0, false); - - cv::Mat dst; - - ASSERT_NO_THROW( - cv::gpu::GpuMat gpures; - cv::gpu::downsample(cv::gpu::GpuMat(src), gpures, k); - gpures.download(dst); - ); - - ASSERT_EQ(dst_gold_size, dst.size()); - - for (int y = 0; y < dst.rows; ++y) - { - for (int x = 0; x < dst.cols; ++x) - { - float gold = src.at(y * k, x * k); - float res = dst.at(y, x); - ASSERT_FLOAT_EQ(gold, res); - } - } -} - -INSTANTIATE_TEST_CASE_P(ImgProc, Downsample, testing::Combine( - testing::ValuesIn(devices()), - testing::Range(2, 6))); - //////////////////////////////////////////////////////////////////////////////// // meanShift diff --git a/modules/stitching/blenders.cpp b/modules/stitching/blenders.cpp index 5797245ca..b41fa4593 100644 --- a/modules/stitching/blenders.cpp +++ b/modules/stitching/blenders.cpp @@ -47,14 +47,14 @@ using namespace cv; static const float WEIGHT_EPS = 1e-5f; -Ptr Blender::createDefault(int type) +Ptr Blender::createDefault(int type, bool try_gpu) { if (type == NO) return new Blender(); if (type == FEATHER) return new FeatherBlender(); if (type == MULTI_BAND) - return new MultiBandBlender(); + return new MultiBandBlender(try_gpu); CV_Error(CV_StsBadArg, "unsupported blending method"); return NULL; } @@ -153,6 +153,13 @@ void FeatherBlender::blend(Mat &dst, Mat &dst_mask) } +MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands) +{ + setNumBands(num_bands); + can_use_gpu_ = try_gpu && gpu::getCudaEnabledDeviceCount(); +} + + void MultiBandBlender::prepare(Rect dst_roi) { dst_roi_final_ = dst_roi; @@ -222,14 +229,14 @@ void MultiBandBlender::feed(const Mat &img, const Mat &mask, Point tl) int right = br_new.x - tl.x - img.cols; // Create the source image Laplacian pyramid - vector src_pyr_gauss(num_bands_ + 1); - copyMakeBorder(img, src_pyr_gauss[0], top, bottom, left, right, + Mat img_with_border; + copyMakeBorder(img, img_with_border, top, bottom, left, right, BORDER_REFLECT); - for (int i = 0; i < num_bands_; ++i) - pyrDown(src_pyr_gauss[i], src_pyr_gauss[i + 1]); vector src_pyr_laplace; - createLaplacePyr(src_pyr_gauss, src_pyr_laplace); - src_pyr_gauss.clear(); + if (can_use_gpu_) + createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace); + else + createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace); // Create the weight map Gaussian pyramid Mat weight_map; @@ -267,7 +274,7 @@ void MultiBandBlender::feed(const Mat &img, const Mat &mask, Point tl) } x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; - } + } } @@ -319,21 +326,43 @@ void createWeightMap(const Mat &mask, float sharpness, Mat &weight) } -void createLaplacePyr(const vector &pyr_gauss, vector &pyr_laplace) +void createLaplacePyr(const Mat &img, int num_levels, vector &pyr) { - if (pyr_gauss.size() == 0) - return; - pyr_laplace.resize(pyr_gauss.size()); + pyr.resize(num_levels + 1); + pyr[0] = img; + for (int i = 0; i < num_levels; ++i) + pyrDown(pyr[i], pyr[i + 1]); Mat tmp; - for (size_t i = 0; i < pyr_laplace.size() - 1; ++i) + for (int i = 0; i < num_levels; ++i) { - pyrUp(pyr_gauss[i + 1], tmp, pyr_gauss[i].size()); - subtract(pyr_gauss[i], tmp, pyr_laplace[i]); + pyrUp(pyr[i + 1], tmp, pyr[i].size()); + subtract(pyr[i], tmp, pyr[i]); } - pyr_laplace[pyr_laplace.size() - 1] = pyr_gauss[pyr_laplace.size() - 1].clone(); } +void createLaplacePyrGpu(const Mat &img, int num_levels, vector &pyr) +{ + pyr.resize(num_levels + 1); + + vector gpu_pyr(num_levels + 1); + gpu_pyr[0] = img; + for (int i = 0; i < num_levels; ++i) + gpu::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]); + + gpu::GpuMat tmp; + for (int i = 0; i < num_levels; ++i) + { + gpu::pyrUp(gpu_pyr[i + 1], tmp); + gpu::subtract(gpu_pyr[i], tmp, gpu_pyr[i]); + pyr[i] = gpu_pyr[i]; + } + + pyr[num_levels] = gpu_pyr[num_levels]; +} + + + void restoreImageFromLaplacePyr(vector &pyr) { if (pyr.size() == 0) diff --git a/modules/stitching/blenders.hpp b/modules/stitching/blenders.hpp index a6fd0eceb..04ede3cfb 100644 --- a/modules/stitching/blenders.hpp +++ b/modules/stitching/blenders.hpp @@ -38,77 +38,79 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -//M*/ -#ifndef __OPENCV_BLENDERS_HPP__ -#define __OPENCV_BLENDERS_HPP__ - -#include "precomp.hpp" - -// Simple blender which puts one image over another -class Blender -{ -public: - enum { NO, FEATHER, MULTI_BAND }; - static cv::Ptr createDefault(int type); - - void prepare(const std::vector &corners, const std::vector &sizes); - virtual void prepare(cv::Rect dst_roi); - virtual void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl); - virtual void blend(cv::Mat &dst, cv::Mat &dst_mask); - -protected: - cv::Mat dst_, dst_mask_; - cv::Rect dst_roi_; -}; - - -class FeatherBlender : public Blender -{ -public: - FeatherBlender(float sharpness = 0.02f) { setSharpness(sharpness); } - float sharpness() const { return sharpness_; } - void setSharpness(float val) { sharpness_ = val; } - - void prepare(cv::Rect dst_roi); - void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl); - void blend(cv::Mat &dst, cv::Mat &dst_mask); - -private: - float sharpness_; - cv::Mat weight_map_; - cv::Mat dst_weight_map_; -}; - - -class MultiBandBlender : public Blender -{ -public: - MultiBandBlender(int num_bands = 5) { setNumBands(num_bands); } - int numBands() const { return actual_num_bands_; } - void setNumBands(int val) { actual_num_bands_ = val; } - - void prepare(cv::Rect dst_roi); - void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl); - void blend(cv::Mat &dst, cv::Mat &dst_mask); - -private: - int actual_num_bands_, num_bands_; - std::vector dst_pyr_laplace_; - std::vector dst_band_weights_; - cv::Rect dst_roi_final_; -}; - - -////////////////////////////////////////////////////////////////////////////// -// Auxiliary functions - -void normalize(const cv::Mat& weight, cv::Mat& src); - -void createWeightMap(const cv::Mat& mask, float sharpness, cv::Mat& weight); - -void createLaplacePyr(const std::vector& pyr_gauss, std::vector& pyr_laplace); - -// Restores source image in-place (result will be stored in pyr[0]) -void restoreImageFromLaplacePyr(std::vector& pyr); - -#endif // __OPENCV_BLENDERS_HPP__ +//M*/ +#ifndef __OPENCV_BLENDERS_HPP__ +#define __OPENCV_BLENDERS_HPP__ + +#include "precomp.hpp" + +// Simple blender which puts one image over another +class Blender +{ +public: + enum { NO, FEATHER, MULTI_BAND }; + static cv::Ptr createDefault(int type, bool try_gpu = false); + + void prepare(const std::vector &corners, const std::vector &sizes); + virtual void prepare(cv::Rect dst_roi); + virtual void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl); + virtual void blend(cv::Mat &dst, cv::Mat &dst_mask); + +protected: + cv::Mat dst_, dst_mask_; + cv::Rect dst_roi_; +}; + + +class FeatherBlender : public Blender +{ +public: + FeatherBlender(float sharpness = 0.02f) { setSharpness(sharpness); } + float sharpness() const { return sharpness_; } + void setSharpness(float val) { sharpness_ = val; } + + void prepare(cv::Rect dst_roi); + void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl); + void blend(cv::Mat &dst, cv::Mat &dst_mask); + +private: + float sharpness_; + cv::Mat weight_map_; + cv::Mat dst_weight_map_; +}; + + +class MultiBandBlender : public Blender +{ +public: + MultiBandBlender(int try_gpu = false, int num_bands = 5); + int numBands() const { return actual_num_bands_; } + void setNumBands(int val) { actual_num_bands_ = val; } + + void prepare(cv::Rect dst_roi); + void feed(const cv::Mat &img, const cv::Mat &mask, cv::Point tl); + void blend(cv::Mat &dst, cv::Mat &dst_mask); + +private: + int actual_num_bands_, num_bands_; + std::vector dst_pyr_laplace_; + std::vector dst_band_weights_; + cv::Rect dst_roi_final_; + bool can_use_gpu_; +}; + + +////////////////////////////////////////////////////////////////////////////// +// Auxiliary functions + +void normalize(const cv::Mat& weight, cv::Mat& src); + +void createWeightMap(const cv::Mat& mask, float sharpness, cv::Mat& weight); + +void createLaplacePyr(const cv::Mat &img, int num_levels, std::vector& pyr); +void createLaplacePyrGpu(const cv::Mat &img, int num_levels, std::vector& pyr); + +// Restores source image in-place (result will be stored in pyr[0]) +void restoreImageFromLaplacePyr(std::vector& pyr); + +#endif // __OPENCV_BLENDERS_HPP__ diff --git a/modules/stitching/main.cpp b/modules/stitching/main.cpp index 7c4922887..defb32b34 100644 --- a/modules/stitching/main.cpp +++ b/modules/stitching/main.cpp @@ -40,7 +40,7 @@ // //M*/ -// We follow to methods described in these two papers: +// We follow to these papers: // 1) Construction of panoramic mosaics with global and local alignment. // Heung-Yeung Shum and Richard Szeliski. 2000. // 2) Eliminating Ghosting and Exposure Artifacts in Image Mosaics. @@ -461,7 +461,7 @@ int main(int argc, char* argv[]) // Warp images and their masks Ptr warper = Warper::createByCameraFocal(static_cast(warped_image_scale * seam_work_aspect), - warp_type); + warp_type, try_gpu); for (int i = 0; i < num_images; ++i) { corners[i] = warper->warp(images[i], static_cast(cameras[i].focal * seam_work_aspect), @@ -522,7 +522,7 @@ int main(int argc, char* argv[]) // Update warped image scale warped_image_scale *= static_cast(compose_work_aspect); - warper = Warper::createByCameraFocal(warped_image_scale, warp_type); + warper = Warper::createByCameraFocal(warped_image_scale, warp_type, try_gpu); // Update corners and sizes for (int i = 0; i < num_images; ++i) @@ -565,19 +565,19 @@ int main(int argc, char* argv[]) img_warped.convertTo(img_warped_s, CV_16S); img_warped.release(); img.release(); - mask.release(); + mask.release(); dilate(masks_warped[img_idx], dilated_mask, Mat()); resize(dilated_mask, seam_mask, mask_warped.size()); mask_warped = seam_mask & mask_warped; if (static_cast(blender) == 0) - { - blender = Blender::createDefault(blend_type); + { + blender = Blender::createDefault(blend_type, try_gpu); Size dst_sz = resultRoi(corners, sizes).size(); float blend_width = sqrt(static_cast(dst_sz.area())) * blend_strength / 100.f; if (blend_width < 1.f) - blender = Blender::createDefault(Blender::NO); + blender = Blender::createDefault(Blender::NO, try_gpu); else if (blend_type == Blender::MULTI_BAND) { MultiBandBlender* mb = dynamic_cast(static_cast(blender)); @@ -594,7 +594,7 @@ int main(int argc, char* argv[]) } // Blend the current image - blender->feed(img_warped_s, mask_warped, corners[img_idx]); + blender->feed(img_warped_s, mask_warped, corners[img_idx]); } Mat result, result_mask; diff --git a/modules/stitching/matchers.cpp b/modules/stitching/matchers.cpp index 07c8fb958..9a7c0e12e 100644 --- a/modules/stitching/matchers.cpp +++ b/modules/stitching/matchers.cpp @@ -257,15 +257,7 @@ void FeaturesMatcher::operator ()(const vector &features, vector< namespace { - class PairLess - { - public: - bool operator()(const pair& l, const pair& r) const - { - return l.first < r.first || (l.first == r.first && l.second < r.second); - } - }; - typedef set,PairLess> MatchesSet; + typedef set > MatchesSet; // These two classes are aimed to find features matches only, not to // estimate homography diff --git a/modules/stitching/warpers.cpp b/modules/stitching/warpers.cpp index 9487c54d6..68916efd1 100644 --- a/modules/stitching/warpers.cpp +++ b/modules/stitching/warpers.cpp @@ -38,111 +38,133 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -//M*/ -#include "warpers.hpp" - -using namespace std; -using namespace cv; - -Ptr Warper::createByCameraFocal(float focal, int type) -{ - if (type == PLANE) - return new PlaneWarper(focal); - if (type == CYLINDRICAL) - return new CylindricalWarper(focal); - if (type == SPHERICAL) - return new SphericalWarper(focal); - CV_Error(CV_StsBadArg, "unsupported warping type"); - return NULL; -} - - -void ProjectorBase::setTransformation(const Mat &R) -{ - CV_Assert(R.size() == Size(3, 3)); - CV_Assert(R.type() == CV_32F); - r[0] = R.at(0, 0); r[1] = R.at(0, 1); r[2] = R.at(0, 2); - r[3] = R.at(1, 0); r[4] = R.at(1, 1); r[5] = R.at(1, 2); - r[6] = R.at(2, 0); r[7] = R.at(2, 1); r[8] = R.at(2, 2); - - Mat Rinv = R.inv(); - rinv[0] = Rinv.at(0, 0); rinv[1] = Rinv.at(0, 1); rinv[2] = Rinv.at(0, 2); - rinv[3] = Rinv.at(1, 0); rinv[4] = Rinv.at(1, 1); rinv[5] = Rinv.at(1, 2); - rinv[6] = Rinv.at(2, 0); rinv[7] = Rinv.at(2, 1); rinv[8] = Rinv.at(2, 2); -} - - -void PlaneWarper::detectResultRoi(Point &dst_tl, Point &dst_br) -{ - float tl_uf = numeric_limits::max(); - float tl_vf = numeric_limits::max(); - float br_uf = -numeric_limits::max(); - float br_vf = -numeric_limits::max(); - - float u, v; - - projector_.mapForward(0, 0, u, v); - tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); - br_uf = max(br_uf, u); br_vf = max(br_vf, v); - - projector_.mapForward(0, static_cast(src_size_.height - 1), u, v); - tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); - br_uf = max(br_uf, u); br_vf = max(br_vf, v); - - projector_.mapForward(static_cast(src_size_.width - 1), 0, u, v); - tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); - br_uf = max(br_uf, u); br_vf = max(br_vf, v); - - projector_.mapForward(static_cast(src_size_.width - 1), static_cast(src_size_.height - 1), u, v); - tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); - br_uf = max(br_uf, u); br_vf = max(br_vf, v); - - dst_tl.x = static_cast(tl_uf); - dst_tl.y = static_cast(tl_vf); - dst_br.x = static_cast(br_uf); - dst_br.y = static_cast(br_vf); -} - - -void SphericalWarper::detectResultRoi(Point &dst_tl, Point &dst_br) -{ - detectResultRoiByBorder(dst_tl, dst_br); - - float tl_uf = static_cast(dst_tl.x); - float tl_vf = static_cast(dst_tl.y); - float br_uf = static_cast(dst_br.x); - float br_vf = static_cast(dst_br.y); - - float x = projector_.rinv[1]; - float y = projector_.rinv[4]; - float z = projector_.rinv[7]; - if (y > 0.f) - { - x = projector_.focal * x / z + src_size_.width * 0.5f; - y = projector_.focal * y / z + src_size_.height * 0.5f; - if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height) - { - tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast(CV_PI * projector_.scale)); - br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast(CV_PI * projector_.scale)); - } - } - - x = projector_.rinv[1]; - y = -projector_.rinv[4]; - z = projector_.rinv[7]; - if (y > 0.f) - { - x = projector_.focal * x / z + src_size_.width * 0.5f; - y = projector_.focal * y / z + src_size_.height * 0.5f; - if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height) - { - tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast(0)); - br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast(0)); - } - } - - dst_tl.x = static_cast(tl_uf); - dst_tl.y = static_cast(tl_vf); - dst_br.x = static_cast(br_uf); - dst_br.y = static_cast(br_vf); -} +//M*/ +#include "warpers.hpp" + +using namespace std; +using namespace cv; + +Ptr Warper::createByCameraFocal(float focal, int type, bool try_gpu) +{ + bool can_use_gpu = try_gpu && gpu::getCudaEnabledDeviceCount(); + if (type == PLANE) + return new PlaneWarper(focal); + if (type == CYLINDRICAL) + return new CylindricalWarper(focal); + if (type == SPHERICAL) + return !can_use_gpu ? new SphericalWarper(focal) : new SphericalWarperGpu(focal); + CV_Error(CV_StsBadArg, "unsupported warping type"); + return NULL; +} + + +void ProjectorBase::setTransformation(const Mat &R) +{ + CV_Assert(R.size() == Size(3, 3)); + CV_Assert(R.type() == CV_32F); + r[0] = R.at(0, 0); r[1] = R.at(0, 1); r[2] = R.at(0, 2); + r[3] = R.at(1, 0); r[4] = R.at(1, 1); r[5] = R.at(1, 2); + r[6] = R.at(2, 0); r[7] = R.at(2, 1); r[8] = R.at(2, 2); + + Mat Rinv = R.inv(); + rinv[0] = Rinv.at(0, 0); rinv[1] = Rinv.at(0, 1); rinv[2] = Rinv.at(0, 2); + rinv[3] = Rinv.at(1, 0); rinv[4] = Rinv.at(1, 1); rinv[5] = Rinv.at(1, 2); + rinv[6] = Rinv.at(2, 0); rinv[7] = Rinv.at(2, 1); rinv[8] = Rinv.at(2, 2); +} + + +void PlaneWarper::detectResultRoi(Point &dst_tl, Point &dst_br) +{ + float tl_uf = numeric_limits::max(); + float tl_vf = numeric_limits::max(); + float br_uf = -numeric_limits::max(); + float br_vf = -numeric_limits::max(); + + float u, v; + + projector_.mapForward(0, 0, u, v); + tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); + br_uf = max(br_uf, u); br_vf = max(br_vf, v); + + projector_.mapForward(0, static_cast(src_size_.height - 1), u, v); + tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); + br_uf = max(br_uf, u); br_vf = max(br_vf, v); + + projector_.mapForward(static_cast(src_size_.width - 1), 0, u, v); + tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); + br_uf = max(br_uf, u); br_vf = max(br_vf, v); + + projector_.mapForward(static_cast(src_size_.width - 1), static_cast(src_size_.height - 1), u, v); + tl_uf = min(tl_uf, u); tl_vf = min(tl_vf, v); + br_uf = max(br_uf, u); br_vf = max(br_vf, v); + + dst_tl.x = static_cast(tl_uf); + dst_tl.y = static_cast(tl_vf); + dst_br.x = static_cast(br_uf); + dst_br.y = static_cast(br_vf); +} + + +void SphericalWarper::detectResultRoi(Point &dst_tl, Point &dst_br) +{ + detectResultRoiByBorder(dst_tl, dst_br); + + float tl_uf = static_cast(dst_tl.x); + float tl_vf = static_cast(dst_tl.y); + float br_uf = static_cast(dst_br.x); + float br_vf = static_cast(dst_br.y); + + float x = projector_.rinv[1]; + float y = projector_.rinv[4]; + float z = projector_.rinv[7]; + if (y > 0.f) + { + x = projector_.focal * x / z + src_size_.width * 0.5f; + y = projector_.focal * y / z + src_size_.height * 0.5f; + if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height) + { + tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast(CV_PI * projector_.scale)); + br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast(CV_PI * projector_.scale)); + } + } + + x = projector_.rinv[1]; + y = -projector_.rinv[4]; + z = projector_.rinv[7]; + if (y > 0.f) + { + x = projector_.focal * x / z + src_size_.width * 0.5f; + y = projector_.focal * y / z + src_size_.height * 0.5f; + if (x > 0.f && x < src_size_.width && y > 0.f && y < src_size_.height) + { + tl_uf = min(tl_uf, 0.f); tl_vf = min(tl_vf, static_cast(0)); + br_uf = max(br_uf, 0.f); br_vf = max(br_vf, static_cast(0)); + } + } + + dst_tl.x = static_cast(tl_uf); + dst_tl.y = static_cast(tl_vf); + dst_br.x = static_cast(br_uf); + dst_br.y = static_cast(br_vf); +} + + +Point SphericalWarperGpu::warp(const Mat &src, float focal, const Mat &R, Mat &dst, + int interp_mode, int border_mode) +{ + src_size_ = src.size(); + projector_.size = src.size(); + projector_.focal = focal; + projector_.setTransformation(R); + + cv::Point dst_tl, dst_br; + detectResultRoi(dst_tl, dst_br); + + gpu::buildWarpSphericalMaps(src.size(), Rect(dst_tl, Point(dst_br.x+1, dst_br.y+1)), + R, focal, projector_.scale, d_xmap_, d_ymap_); + + dst.create(dst_br.y - dst_tl.y + 1, dst_br.x - dst_tl.x + 1, src.type()); + remap(src, dst, Mat(d_xmap_), Mat(d_ymap_), interp_mode, border_mode); + + return dst_tl; +} diff --git a/modules/stitching/warpers.hpp b/modules/stitching/warpers.hpp index 597ee4fa2..854c87ba6 100644 --- a/modules/stitching/warpers.hpp +++ b/modules/stitching/warpers.hpp @@ -1,4 +1,4 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// + /*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -48,7 +48,7 @@ class Warper { public: enum { PLANE, CYLINDRICAL, SPHERICAL }; - static cv::Ptr createByCameraFocal(float focal, int type); + static cv::Ptr createByCameraFocal(float focal, int type, bool try_gpu = false); virtual ~Warper() {} virtual cv::Point warp(const cv::Mat &src, float focal, const cv::Mat& R, cv::Mat &dst, @@ -73,10 +73,10 @@ template class WarperBase : public Warper { public: - cv::Point warp(const cv::Mat &src, float focal, const cv::Mat &R, cv::Mat &dst, - int interp_mode, int border_mode); + virtual cv::Point warp(const cv::Mat &src, float focal, const cv::Mat &R, cv::Mat &dst, + int interp_mode, int border_mode); - cv::Rect warpRoi(const cv::Size &sz, float focal, const cv::Mat &R); + virtual cv::Rect warpRoi(const cv::Size &sz, float focal, const cv::Mat &R); protected: // Detects ROI of the destination image. It's correct for any projection. @@ -95,7 +95,6 @@ struct PlaneProjector : ProjectorBase { void mapForward(float x, float y, float &u, float &v); void mapBackward(float u, float v, float &x, float &y); - float plane_dist; }; @@ -129,11 +128,23 @@ class SphericalWarper : public WarperBase public: SphericalWarper(float scale = 300.f) { projector_.scale = scale; } -private: +protected: void detectResultRoi(cv::Point &dst_tl, cv::Point &dst_br); }; +class SphericalWarperGpu : public SphericalWarper +{ +public: + SphericalWarperGpu(float scale = 300.f) : SphericalWarper(scale) {} + cv::Point warp(const cv::Mat &src, float focal, const cv::Mat &R, cv::Mat &dst, + int interp_mode, int border_mode); + +private: + cv::gpu::GpuMat d_xmap_, d_ymap_, d_dst_; +}; + + struct CylindricalProjector : ProjectorBase { void mapForward(float x, float y, float &u, float &v); diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index c7ef45bad..48def0cff 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -824,3 +824,45 @@ TEST(solvePnPRansac) GPU_OFF; } } + + +TEST(GaussianBlur) +{ + for (int size = 1000; size < 10000; size += 3000) + { + SUBTEST << "16SC3, size " << size; + + Mat src; gen(src, size, size, CV_16SC3, 0, 256); + Mat dst(src.size(), src.type()); + + CPU_ON; + GaussianBlur(src, dst, Size(5,5), 0); + CPU_OFF; + + gpu::GpuMat d_src(src); + gpu::GpuMat d_dst(src.size(), src.type()); + + GPU_ON; + gpu::GaussianBlur(d_src, d_dst, Size(5,5), 0); + GPU_OFF; + } + + for (int size = 1000; size < 10000; size += 3000) + { + SUBTEST << "8UC4, size " << size; + + Mat src; gen(src, size, size, CV_8UC4, 0, 256); + Mat dst(src.size(), src.type()); + + CPU_ON; + GaussianBlur(src, dst, Size(5,5), 0); + CPU_OFF; + + gpu::GpuMat d_src(src); + gpu::GpuMat d_dst(src.size(), src.type()); + + GPU_ON; + gpu::GaussianBlur(d_src, d_dst, Size(5,5), 0); + GPU_OFF; + } +}