diff --git a/modules/gpu/src/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp index aa9497bc9..217416648 100644 --- a/modules/gpu/src/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/cuda_shared.hpp @@ -61,6 +61,8 @@ namespace cv { static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } + extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels); + extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels); extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels); diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index 56ae28376..ad2e0f331 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -42,7 +42,7 @@ #include #include -#include +//#include #include "cuda_shared.hpp" #include "cuda_runtime.h" @@ -53,6 +53,24 @@ __constant__ __align__(16) double scalar_d[4]; namespace mat_operators { + ////////////////////////////////////////////////////////// + // CopyTo + ////////////////////////////////////////////////////////// + + template + __global__ void kernel_copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels) + { + size_t x = blockIdx.x * blockDim.x + threadIdx.x; + size_t y = blockIdx.y * blockDim.y + threadIdx.y; + + if ((x < cols * channels ) && (y < rows)) + if (mask[y * step_mask + x / channels] != 0) + { + size_t idx = y * (step_mat / sizeof(T)) + x; + mat_dst[idx] = mat_src[idx]; + } + } + ////////////////////////////////////////////////////////// // SetTo ////////////////////////////////////////////////////////// @@ -326,6 +344,44 @@ namespace cv namespace impl { + ////////////////////////////////////////////////////////////// + // CopyTo + ////////////////////////////////////////////////////////////// + + typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels); + + template + void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels) + { + dim3 threadsPerBlock(16,16, 1); + dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1); + ::mat_operators::kernel_copy_to_with_mask<<>> + ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels); + cudaSafeCall ( cudaThreadSynchronize() ); + } + + extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels) + { + static CopyToFunc tab[8] = + { + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + copy_to_with_mask_run, + 0 + }; + + CopyToFunc func = tab[depth]; + + if (func == 0) error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__); + + func(mat_src, mat_dst, mask, channels); + } + + ////////////////////////////////////////////////////////////// // SetTo ////////////////////////////////////////////////////////////// @@ -412,11 +468,9 @@ namespace cv // ConvertTo ////////////////////////////////////////////////////////////// + typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta); - - typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta); - - //#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 130) + //#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 130) template void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta) diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index a29859b89..9469f5918 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -99,9 +99,17 @@ void cv::gpu::GpuMat::copyTo( GpuMat& m ) const cudaSafeCall( cudaThreadSynchronize() ); } -void cv::gpu::GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const +void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const { - CV_Assert(!"Not implemented"); + if (mask.empty()) + { + this->copyTo(mat); + } + else + { + mat.create(this->size(), this->type()); + cv::gpu::impl::copy_to_with_mask(*this, mat, this->depth() , mask, this->channels()); + } } void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const diff --git a/tests/gpu/src/operator_copy_to.cpp b/tests/gpu/src/operator_copy_to.cpp new file mode 100644 index 000000000..c6732e99a --- /dev/null +++ b/tests/gpu/src/operator_copy_to.cpp @@ -0,0 +1,110 @@ +#include "gputest.hpp" +#include "highgui.h" +#include "cv.h" +#include +#include +#include +#include +#include +#include +#include // for cout << setw() + +using namespace cv; +using namespace std; +using namespace gpu; + +class CV_GpuMatOpCopyTo : public CvTest +{ + public: + CV_GpuMatOpCopyTo(); + ~CV_GpuMatOpCopyTo(); + protected: + + template + void print_mat(const T & mat, const std::string & name) const; + + void run(int); + + bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat); + + private: + int rows; + int cols; +}; + +CV_GpuMatOpCopyTo::CV_GpuMatOpCopyTo(): CvTest( "GpuMatOperatorCopyTo", "copyTo" ) +{ + rows = 234; + cols = 123; + + //#define PRINT_MATRIX +} + +CV_GpuMatOpCopyTo::~CV_GpuMatOpCopyTo() {} + +template +void CV_GpuMatOpCopyTo::print_mat(const T & mat, const std::string & name) const +{ + cv::imshow(name, mat); +} + +bool CV_GpuMatOpCopyTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat) +{ + Mat cmat(cpumat.size(), cpumat.type(), Scalar::all(0)); + GpuMat gmat(cmat); + + Mat cpumask(cpumat.size(), CV_8U); + randu(cpumask, Scalar::all(0), Scalar::all(127)); + threshold(cpumask, cpumask, 0, 127, THRESH_BINARY); + GpuMat gpumask(cpumask); + + //int64 time = getTickCount(); + cpumat.copyTo(cmat, cpumask); + //int64 time1 = getTickCount(); + gpumat.copyTo(gmat, gpumask); + //int64 time2 = getTickCount(); + + //std::cout << "\ntime cpu: " << std::fixed << std::setprecision(12) << 1.0 / double((time1 - time) / (double)getTickFrequency()); + //std::cout << "\ntime gpu: " << std::fixed << std::setprecision(12) << 1.0 / double((time2 - time1) / (double)getTickFrequency()); + //std::cout << "\n"; + +#ifdef PRINT_MATRIX + print_mat(cmat, "cpu mat"); + print_mat(gmat, "gpu mat"); + print_mat(cpumask, "cpu mask"); + print_mat(gpumask, "gpu mask"); + cv::waitKey(0); +#endif + + double ret = norm(cmat, gmat); + + if (ret < 1.0) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} + +void CV_GpuMatOpCopyTo::run( int /* start_from */) +{ + bool is_test_good = true; + + for (int i = 0 ; i < 7; i++) + { + Mat cpumat(rows, cols, i); + cpumat.setTo(Scalar::all(127)); + + GpuMat gpumat(cpumat); + + is_test_good &= compare_matrix(cpumat, gpumat); + } + + if (is_test_good == true) + ts->set_failed_test_info(CvTS::OK); + else + ts->set_failed_test_info(CvTS::FAIL_GENERIC); +} + +CV_GpuMatOpCopyTo CV_GpuMatOpCopyTo_test;