modified kernel setto(), added double type, code has been improved
This commit is contained in:
parent
3f5dd5f1cc
commit
98c8ecf829
@ -61,8 +61,8 @@ namespace cv
|
||||
{
|
||||
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
|
||||
|
||||
extern "C" void set_to_without_mask (const DevMem2D& mat, const double * scalar, int depth, int channels);
|
||||
extern "C" void set_to_with_mask (const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels);
|
||||
extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels);
|
||||
extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels);
|
||||
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta);
|
||||
}
|
||||
|
@ -49,7 +49,7 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::impl;
|
||||
|
||||
__constant__ __align__(16) float scalar_d[4];
|
||||
__constant__ __align__(16) double scalar_d[4];
|
||||
|
||||
namespace mat_operators
|
||||
{
|
||||
@ -57,8 +57,8 @@ namespace mat_operators
|
||||
// SetTo
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T, int channels>
|
||||
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
|
||||
template<typename T>
|
||||
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@ -70,14 +70,14 @@ namespace mat_operators
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, int channels>
|
||||
__global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int step_mask)
|
||||
template<typename T>
|
||||
__global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (mask[y * step_mask + x] != 0)
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
if (mask[y * step_mask + x / channels] != 0)
|
||||
{
|
||||
size_t idx = y * (step / sizeof(T)) + x;
|
||||
mat[idx] = scalar_d[ x % channels ];
|
||||
@ -319,94 +319,6 @@ namespace mat_operators
|
||||
|
||||
} // namespace mat_operators
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// SetTo
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
|
||||
{
|
||||
float data[4];
|
||||
data[0] = static_cast<float>(scalar[0]);
|
||||
data[1] = static_cast<float>(scalar[1]);
|
||||
data[2] = static_cast<float>(scalar[2]);
|
||||
data[3] = static_cast<float>(scalar[3]);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
|
||||
|
||||
dim3 threadsPerBlock(16, 16, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
if (channels == 1)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 1><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
}
|
||||
if (channels == 2)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 2><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
}
|
||||
if (channels == 3)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 3><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
}
|
||||
if (channels == 4)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 4><<<numBlocks,threadsPerBlock>>>(mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 4><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_without_mask<float, 4><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, mat.cols, mat.rows, mat.step);
|
||||
}
|
||||
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int elemSize1, int channels)
|
||||
{
|
||||
float data[4];
|
||||
data[0] = static_cast<float>(scalar[0]);
|
||||
data[1] = static_cast<float>(scalar[1]);
|
||||
data[2] = static_cast<float>(scalar[2]);
|
||||
data[3] = static_cast<float>(scalar[3]);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
|
||||
|
||||
dim3 threadsPerBlock(16, 16, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
if (channels == 1)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 1><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
}
|
||||
if (channels == 2)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 2><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
}
|
||||
if (channels == 3)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 3><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
}
|
||||
if (channels == 4)
|
||||
{
|
||||
if (elemSize1 == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 4><<<numBlocks,threadsPerBlock>>>(mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 4><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
if (elemSize1 == 4) ::mat_operators::kernel_set_to_with_mask<float, 4><<<numBlocks,threadsPerBlock>>>((float *)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, mask.step);
|
||||
}
|
||||
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// ConvertTo
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
@ -414,6 +326,94 @@ namespace cv
|
||||
namespace impl
|
||||
{
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// SetTo
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels);
|
||||
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels);
|
||||
|
||||
template <typename T>
|
||||
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void set_to_without_mask_run(const DevMem2D& mat, int channels)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels)
|
||||
{
|
||||
double data[4];
|
||||
data[0] = scalar[0];
|
||||
data[1] = scalar[1];
|
||||
data[2] = scalar[2];
|
||||
data[3] = scalar[3];
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
|
||||
|
||||
static SetToFunc_without_mask tab[8] =
|
||||
{
|
||||
set_to_without_mask_run<unsigned char>,
|
||||
set_to_without_mask_run<char>,
|
||||
set_to_without_mask_run<unsigned short>,
|
||||
set_to_without_mask_run<short>,
|
||||
set_to_without_mask_run<int>,
|
||||
set_to_without_mask_run<float>,
|
||||
set_to_without_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
SetToFunc_without_mask func = tab[depth];
|
||||
|
||||
if (func == 0) error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__);
|
||||
|
||||
func(mat, channels);
|
||||
}
|
||||
|
||||
|
||||
extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels)
|
||||
{
|
||||
double data[4];
|
||||
data[0] = scalar[0];
|
||||
data[1] = scalar[1];
|
||||
data[2] = scalar[2];
|
||||
data[3] = scalar[3];
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
|
||||
|
||||
static SetToFunc_with_mask tab[8] =
|
||||
{
|
||||
set_to_with_mask_run<unsigned char>,
|
||||
set_to_with_mask_run<char>,
|
||||
set_to_with_mask_run<unsigned short>,
|
||||
set_to_with_mask_run<short>,
|
||||
set_to_with_mask_run<int>,
|
||||
set_to_with_mask_run<float>,
|
||||
set_to_with_mask_run<double>,
|
||||
0
|
||||
};
|
||||
|
||||
SetToFunc_with_mask func = tab[depth];
|
||||
|
||||
if (func == 0) error("Operation \'ConvertTo\' doesn't supported on your GPU model", __FILE__, __LINE__);
|
||||
|
||||
func(mat, mask, channels);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// ConvertTo
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta);
|
||||
|
||||
//#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 130)
|
||||
|
@ -133,7 +133,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
|
||||
|
||||
GpuMat& GpuMat::operator = (const Scalar& s)
|
||||
{
|
||||
cv::gpu::impl::set_to_without_mask(*this, s.val, this->elemSize1(), this->channels());
|
||||
cv::gpu::impl::set_to_without_mask( *this, this->depth(), s.val, this->channels());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -145,11 +145,11 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
cv::gpu::impl::set_to_without_mask(*this, s.val, this->elemSize1(), this->channels());
|
||||
cv::gpu::impl::set_to_without_mask( *this, this->depth(), s.val, this->channels());
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::gpu::impl::set_to_with_mask(*this, s.val, mask, this->elemSize1(), this->channels());
|
||||
cv::gpu::impl::set_to_with_mask( *this, this->depth(), s.val, mask, this->channels());
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
@ -24,19 +24,6 @@ class CV_GpuMatOpSetTo : public CvTest
|
||||
|
||||
bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
|
||||
|
||||
bool test_cv_8u_c1();
|
||||
bool test_cv_8u_c2();
|
||||
bool test_cv_8u_c3();
|
||||
bool test_cv_8u_c4();
|
||||
|
||||
bool test_cv_16u_c4();
|
||||
|
||||
bool test_cv_32f_c1();
|
||||
bool test_cv_32f_c2();
|
||||
bool test_cv_32f_c3();
|
||||
bool test_cv_32f_c4();
|
||||
|
||||
|
||||
private:
|
||||
int rows;
|
||||
int cols;
|
||||
@ -45,13 +32,13 @@ class CV_GpuMatOpSetTo : public CvTest
|
||||
|
||||
CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" )
|
||||
{
|
||||
rows = 129;
|
||||
cols = 127;
|
||||
rows = 256;
|
||||
cols = 124;
|
||||
|
||||
s.val[0] = 128.0;
|
||||
s.val[1] = 128.0;
|
||||
s.val[2] = 128.0;
|
||||
s.val[3] = 128.0;
|
||||
s.val[0] = 127.0;
|
||||
s.val[1] = 127.0;
|
||||
s.val[2] = 127.0;
|
||||
s.val[3] = 127.0;
|
||||
|
||||
//#define PRINT_MATRIX
|
||||
}
|
||||
@ -99,95 +86,16 @@ bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_8u_c1()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_8U, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_8u_c2()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_8UC2, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_8u_c3()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_8UC3, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_8u_c4()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_8UC4, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_16u_c4()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_16UC4, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_32f_c1()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_32F, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_32f_c2()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_32FC2, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_32f_c3()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_32FC3, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
bool CV_GpuMatOpSetTo::test_cv_32f_c4()
|
||||
{
|
||||
Mat cpumat(rows, cols, CV_32FC4, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
|
||||
return compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
void CV_GpuMatOpSetTo::run( int /* start_from */)
|
||||
{
|
||||
bool is_test_good = true;
|
||||
|
||||
is_test_good &= test_cv_8u_c1();
|
||||
is_test_good &= test_cv_8u_c2();
|
||||
is_test_good &= test_cv_8u_c3();
|
||||
is_test_good &= test_cv_8u_c4();
|
||||
|
||||
is_test_good &= test_cv_16u_c4();
|
||||
|
||||
is_test_good &= test_cv_32f_c1();
|
||||
is_test_good &= test_cv_32f_c2();
|
||||
is_test_good &= test_cv_32f_c3();
|
||||
is_test_good &= test_cv_32f_c4();
|
||||
for (int i = 0; i < 7; i++)
|
||||
{
|
||||
Mat cpumat(rows, cols, i, Scalar::all(0));
|
||||
GpuMat gpumat(cpumat);
|
||||
is_test_good &= compare_matrix(cpumat, gpumat);
|
||||
}
|
||||
|
||||
if (is_test_good == true)
|
||||
ts->set_failed_test_info(CvTS::OK);
|
||||
|
Loading…
x
Reference in New Issue
Block a user