From f37ac8e4caf2f646c4662f9a3a6d87b891a722fe Mon Sep 17 00:00:00 2001 From: Andrey Morozov Date: Wed, 21 Jul 2010 10:43:01 +0000 Subject: [PATCH] fixed implementation of gpumat::setTo() and improved gputest --- modules/gpu/src/cuda/matrix_operations.cu | 104 +++++--- tests/gpu/CMakeLists.txt | 6 +- tests/gpu/src/operator_set_to.cpp | 303 ++++++++++++++++++++-- 3 files changed, 343 insertions(+), 70 deletions(-) diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/gpu/src/cuda/matrix_operations.cu index 9776b53a1..ac7b08fc0 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/gpu/src/cuda/matrix_operations.cu @@ -41,54 +41,66 @@ //M*/ #include +#include #include #include "cuda_shared.hpp" #include "cuda_runtime.h" -__constant__ float scalar_d[4]; +__constant__ __align__(16) float scalar_d[4]; namespace mat_operators { - template struct unroll { __device__ static void unroll_set(T * mat, size_t i) { - mat[i] = static_cast(scalar_d[i % channels]); + mat[i] = static_cast(scalar_d[channels - count]); unroll::unroll_set(mat, i+1); } - __device__ static void unroll_set_with_mask(T * mat, float mask, size_t i) + __device__ static void unroll_set_with_mask(T * mat, unsigned char mask, size_t i) { - mat[i] = mask * static_cast(scalar_d[i % channels]); + if ( mask != 0 ) + mat[i] = static_cast(scalar_d[channels - count]); + unroll::unroll_set_with_mask(mat, mask, i+1); } }; template - struct unroll + struct unroll { __device__ static void unroll_set(T * , size_t){} - __device__ static void unroll_set_with_mask(T * , float, size_t){} + __device__ static void unroll_set_with_mask(T * , unsigned char, size_t){} }; template - __global__ void kernel_set_to_without_mask(T * mat) + __device__ size_t GetIndex(size_t i, int cols, int rows, int step) { - size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T); - unroll::unroll_set(mat, i); + return ((i / static_cast(cols))*static_cast(step) / static_cast(sizeof(T))) + + (i % static_cast(rows))*static_cast(channels) ; } template - __global__ void kernel_set_to_with_mask(T * mat, const float * mask) + __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step) { - size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T); - unroll::unroll_set_with_mask(mat, i, mask[i]); + size_t i = (blockIdx.x * blockDim.x + threadIdx.x); + if (i < cols * rows) + { + unroll::unroll_set(mat, GetIndex(i, cols, rows, step)); + } + } + + template + __global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step) + { + size_t i = (blockIdx.x * blockDim.x + threadIdx.x); + if (i < cols * rows) + unroll::unroll_set_with_mask(mat, mask[i], GetIndex(i, cols, rows, step)); } } - extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int elemSize1, int channels) { // download scalar to constant memory @@ -97,29 +109,36 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl data[1] = scalar[1]; data[2] = scalar[2]; data[3] = scalar[3]; - cudaMemcpyToSymbol(scalar_d, data, sizeof(data)); + cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); - dim3 numBlocks(mat.rows * mat.step / 256, 1, 1); - dim3 threadsPerBlock(256); + dim3 threadsPerBlock(256,1,1); + dim3 numBlocks (mat.rows * mat.cols / threadsPerBlock.x + 1, 1, 1); if (channels == 1) { - if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (float *)mask.ptr); - if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (float *)mask.ptr); - if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned int *)mat.ptr, (float *)mask.ptr); + if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); } if (channels == 2) { - if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (float *)mask.ptr); - if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (float *)mask.ptr); - if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned int *)mat.ptr, (float *)mask.ptr); + if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); } if (channels == 3) { - if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (float *)mask.ptr); - if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (float *)mask.ptr); - if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned int *)mat.ptr, (float *)mask.ptr); + if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); } + if (channels == 4) + { + if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<<>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step); + } + cudaSafeCall( cudaThreadSynchronize() ); } extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels) @@ -129,28 +148,35 @@ extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const do data[1] = scalar[1]; data[2] = scalar[2]; data[3] = scalar[3]; - cudaMemcpyToSymbol(scalar_d, data, sizeof(data)); + cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); - int numBlocks = mat.rows * mat.step / 256; - - dim3 threadsPerBlock(256); + dim3 threadsPerBlock(256, 1, 1); + dim3 numBlocks (mat.rows * mat.cols / threadsPerBlock.x + 1, 1, 1); if (channels == 1) { - if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr); - if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr); - if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned int *)mat.ptr); + if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask< float, 1><<>>(( float *)mat.ptr, mat.cols, mat.rows, mat.step); } if (channels == 2) { - if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr); - if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr); - if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned int *)mat.ptr); + if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask< float , 2><<>>((float *)mat.ptr, mat.cols, mat.rows, mat.step); } if (channels == 3) { - if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr); - if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr); - if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned int *)mat.ptr); + if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask< float, 3><<>>(( float *)mat.ptr, mat.cols, mat.rows, mat.step); } + if (channels == 4) + { + if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step); + if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<<>>((float *)mat.ptr, mat.cols, mat.rows, mat.step); + } + + cudaSafeCall( cudaThreadSynchronize() ); } diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 15a8ecd8e..1069df5db 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -10,7 +10,7 @@ source_group("Include" FILES ${test_hdrs}) set(the_target "opencv_test_gpu") -include_directories ( +include_directories ( "${CMAKE_SOURCE_DIR}/include/opencv" "${CMAKE_SOURCE_DIR}/modules/core/include" "${CMAKE_SOURCE_DIR}/modules/imgproc/include" @@ -36,10 +36,10 @@ set_target_properties(${the_target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/" ) -add_dependencies(${the_target} opencv_ts opencv_gpu) +add_dependencies(${the_target} opencv_ts opencv_gpu opencv_highgui) # Add the required libraries for linking: -target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} opencv_ts opencv_gpu) +target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} opencv_ts opencv_gpu opencv_highgui) enable_testing() get_target_property(LOC ${the_target} LOCATION) diff --git a/tests/gpu/src/operator_set_to.cpp b/tests/gpu/src/operator_set_to.cpp index 7058f0e85..73d7cc004 100644 --- a/tests/gpu/src/operator_set_to.cpp +++ b/tests/gpu/src/operator_set_to.cpp @@ -1,4 +1,5 @@ #include "gputest.hpp" +#include "highgui.h" #include #include #include @@ -16,52 +17,298 @@ class CV_GpuMatOpSetTo : public CvTest CV_GpuMatOpSetTo(); ~CV_GpuMatOpSetTo(); protected: - void print_mat(cv::Mat & mat); + void print_mat(cv::Mat & mat, std::string name = "cpu mat"); + void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat"); void run(int); + + bool test_cv_8u_c1(); + bool test_cv_8u_c2(); + bool test_cv_8u_c3(); + bool test_cv_8u_c4(); + + bool test_cv_16u_c4(); + + bool test_cv_32f_c1(); + bool test_cv_32f_c2(); + bool test_cv_32f_c3(); + bool test_cv_32f_c4(); + + private: + int w; + int h; + Scalar s; }; -CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" ) {} -CV_GpuMatOpSetTo::~CV_GpuMatOpSetTo() {} - -void CV_GpuMatOpSetTo::print_mat(cv::Mat & mat) +CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" ) { - for (size_t j = 0; j < mat.rows; j++) - { - for (size_t i = 0; i < mat.cols; i++) - { - std::cout << " " << int(mat.ptr(j)[i]); - } - std::cout << std::endl; - } - std::cout << std::endl; + w = 100; + h = 100; + + s.val[0] = 128.0; + s.val[1] = 128.0; + s.val[2] = 128.0; + s.val[3] = 128.0; + + //#define PRINT_MATRIX } -void CV_GpuMatOpSetTo::run( int /* start_from */) -{ - Mat cpumat(1024, 1024, CV_8U, Scalar::all(0)); - GpuMat gpumat(cpumat); +CV_GpuMatOpSetTo::~CV_GpuMatOpSetTo() {} - Scalar s(3); +void CV_GpuMatOpSetTo::print_mat(cv::Mat & mat, std::string name ) +{ + cv::imshow(name, mat); +} + +void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name) +{ + cv::Mat newmat; + mat.download(newmat); + print_mat(newmat, name); +} + +bool CV_GpuMatOpSetTo::test_cv_8u_c1() +{ + Mat cpumat(w, h, CV_8U, Scalar::all(0)); + GpuMat gpumat(cpumat); cpumat.setTo(s); gpumat.setTo(s); +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + double ret = norm(cpumat, gpumat); - /* - std::cout << "norm() = " << ret << "\n"; + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} - std::cout << "cpumat: \n"; +bool CV_GpuMatOpSetTo::test_cv_8u_c2() +{ + Mat cpumat(w, h, CV_8UC2, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif - Mat newmat; - gpumat.download(newmat); + double ret = norm(cpumat, gpumat); - std::cout << "gpumat: \n"; - print_mat(newmat); - */ + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} - if (ret < 1.0) +bool CV_GpuMatOpSetTo::test_cv_8u_c3() +{ + Mat cpumat(w, h, CV_8UC3, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} + +bool CV_GpuMatOpSetTo::test_cv_8u_c4() +{ + Mat cpumat(w, h, CV_8UC4, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} + +bool CV_GpuMatOpSetTo::test_cv_16u_c4() +{ + Mat cpumat(w, h, CV_16UC4, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} + + +bool CV_GpuMatOpSetTo::test_cv_32f_c1() +{ + Mat cpumat(w, h, CV_32F, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} + +bool CV_GpuMatOpSetTo::test_cv_32f_c2() +{ + Mat cpumat(w, h, CV_32FC2, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret; + return false; + } +} + +bool CV_GpuMatOpSetTo::test_cv_32f_c3() +{ + Mat cpumat(w, h, CV_32FC3, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret; + return false; + } +} + +bool CV_GpuMatOpSetTo::test_cv_32f_c4() +{ + Mat cpumat(w, h, CV_32FC4, Scalar::all(0)); + GpuMat gpumat(cpumat); + + cpumat.setTo(s); + gpumat.setTo(s); + +#ifdef PRINT_MATRIX + print_mat(cpumat); + print_mat(gpumat); + cv::waitKey(0); +#endif + + double ret = norm(cpumat, gpumat); + + if (ret < 0.1) + return true; + else + { + std::cout << "return : " << ret << "\n"; + return false; + } +} + +void CV_GpuMatOpSetTo::run( int /* start_from */) +{ + bool is_test_good = true; + + is_test_good &= test_cv_8u_c1(); + is_test_good &= test_cv_8u_c2(); + is_test_good &= test_cv_8u_c3(); + is_test_good &= test_cv_8u_c4(); + + is_test_good &= test_cv_16u_c4(); + + is_test_good &= test_cv_32f_c1(); + is_test_good &= test_cv_32f_c2(); + is_test_good &= test_cv_32f_c3(); + is_test_good &= test_cv_32f_c4(); + + if (is_test_good == true) ts->set_failed_test_info(CvTS::OK); else ts->set_failed_test_info(CvTS::FAIL_GENERIC);