fixed implementation of gpumat::setTo()

This commit is contained in:
Andrey Morozov 2010-07-21 13:35:41 +00:00
parent f37ac8e4ca
commit 2fe75ea33e
2 changed files with 54 additions and 174 deletions

View File

@ -76,19 +76,21 @@ namespace mat_operators
}; };
template <typename T, int channels> template <typename T, int channels>
__device__ size_t GetIndex(size_t i, int cols, int rows, int step) __device__ size_t GetIndex(size_t i, int cols, int step)
{ {
return ((i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T))) + size_t ret = (i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T)) +
(i % static_cast<size_t>(rows))*static_cast<size_t>(channels) ; (i % static_cast<size_t>(cols))*static_cast<size_t>(channels);
return ret;
} }
template <typename T, int channels> template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step) __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
{ {
size_t i = (blockIdx.x * blockDim.x + threadIdx.x); size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows) if (i < cols * rows)
{ {
unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, rows, step)); unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, step));
} }
} }
@ -97,7 +99,7 @@ namespace mat_operators
{ {
size_t i = (blockIdx.x * blockDim.x + threadIdx.x); size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows) if (i < cols * rows)
unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, rows, step)); unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, step));
} }
} }
@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
{ {
// download scalar to constant memory // download scalar to constant memory
float data[4]; float data[4];
data[0] = scalar[0]; data[0] = static_cast<float>(scalar[0]);
data[1] = scalar[1]; data[1] = static_cast<float>(scalar[1]);
data[2] = scalar[2]; data[2] = static_cast<float>(scalar[2]);
data[3] = scalar[3]; data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256,1,1); dim3 threadsPerBlock(256,1,1);
@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels) extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
{ {
float data[4]; float data[4];
data[0] = scalar[0]; data[0] = static_cast<float>(scalar[0]);
data[1] = scalar[1]; data[1] = static_cast<float>(scalar[1]);
data[2] = scalar[2]; data[2] = static_cast<float>(scalar[2]);
data[3] = scalar[3]; data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data))); cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256, 1, 1); dim3 threadsPerBlock(256, 1, 1);

View File

@ -21,6 +21,8 @@ class CV_GpuMatOpSetTo : public CvTest
void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat"); void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat");
void run(int); void run(int);
bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
bool test_cv_8u_c1(); bool test_cv_8u_c1();
bool test_cv_8u_c2(); bool test_cv_8u_c2();
bool test_cv_8u_c3(); bool test_cv_8u_c3();
@ -34,15 +36,15 @@ class CV_GpuMatOpSetTo : public CvTest
bool test_cv_32f_c4(); bool test_cv_32f_c4();
private: private:
int w; int rows;
int h; int cols;
Scalar s; Scalar s;
}; };
CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" ) CV_GpuMatOpSetTo::CV_GpuMatOpSetTo(): CvTest( "GpuMatOperatorSetTo", "setTo" )
{ {
w = 100; rows = 127;
h = 100; cols = 129;
s.val[0] = 128.0; s.val[0] = 128.0;
s.val[1] = 128.0; s.val[1] = 128.0;
@ -66,13 +68,16 @@ void CV_GpuMatOpSetTo::print_mat(gpu::GpuMat & mat, std::string name)
print_mat(newmat, name); print_mat(newmat, name);
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c1() bool CV_GpuMatOpSetTo::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
{ {
Mat cpumat(w, h, CV_8U, Scalar::all(0)); //int64 time = getTickCount();
GpuMat gpumat(cpumat);
cpumat.setTo(s); cpumat.setTo(s);
//int64 time1 = getTickCount();
gpumat.setTo(s); gpumat.setTo(s);
//int64 time2 = getTickCount();
//std::cout << "\ntime cpu:" << double((time1 - time) / getTickFrequency());
//std::cout << "\ntime gpu:" << double((time2 - time1) / getTickFrequency());
//std::cout << "\n";
#ifdef PRINT_MATRIX #ifdef PRINT_MATRIX
print_mat(cpumat); print_mat(cpumat);
@ -82,7 +87,7 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
double ret = norm(cpumat, gpumat); double ret = norm(cpumat, gpumat);
if (ret < 0.1) if (ret < 1.0)
return true; return true;
else else
{ {
@ -91,205 +96,78 @@ bool CV_GpuMatOpSetTo::test_cv_8u_c1()
} }
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c1()
{
Mat cpumat(rows, cols, CV_8U, Scalar::all(0));
GpuMat gpumat(cpumat);
return compare_matrix(cpumat, gpumat);
}
bool CV_GpuMatOpSetTo::test_cv_8u_c2() bool CV_GpuMatOpSetTo::test_cv_8u_c2()
{ {
Mat cpumat(w, h, CV_8UC2, Scalar::all(0)); Mat cpumat(rows, cols, CV_8UC2, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c3() bool CV_GpuMatOpSetTo::test_cv_8u_c3()
{ {
Mat cpumat(w, h, CV_8UC3, Scalar::all(0)); Mat cpumat(rows, cols, CV_8UC3, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_8u_c4() bool CV_GpuMatOpSetTo::test_cv_8u_c4()
{ {
Mat cpumat(w, h, CV_8UC4, Scalar::all(0)); Mat cpumat(rows, cols, CV_8UC4, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_16u_c4() bool CV_GpuMatOpSetTo::test_cv_16u_c4()
{ {
Mat cpumat(w, h, CV_16UC4, Scalar::all(0)); Mat cpumat(rows, cols, CV_16UC4, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c1() bool CV_GpuMatOpSetTo::test_cv_32f_c1()
{ {
Mat cpumat(w, h, CV_32F, Scalar::all(0)); Mat cpumat(rows, cols, CV_32F, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c2() bool CV_GpuMatOpSetTo::test_cv_32f_c2()
{ {
Mat cpumat(w, h, CV_32FC2, Scalar::all(0)); Mat cpumat(rows, cols, CV_32FC2, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret;
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c3() bool CV_GpuMatOpSetTo::test_cv_32f_c3()
{ {
Mat cpumat(w, h, CV_32FC3, Scalar::all(0)); Mat cpumat(rows, cols, CV_32FC3, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret;
return false;
}
} }
bool CV_GpuMatOpSetTo::test_cv_32f_c4() bool CV_GpuMatOpSetTo::test_cv_32f_c4()
{ {
Mat cpumat(w, h, CV_32FC4, Scalar::all(0)); Mat cpumat(rows, cols, CV_32FC4, Scalar::all(0));
GpuMat gpumat(cpumat); GpuMat gpumat(cpumat);
cpumat.setTo(s); return compare_matrix(cpumat, gpumat);
gpumat.setTo(s);
#ifdef PRINT_MATRIX
print_mat(cpumat);
print_mat(gpumat);
cv::waitKey(0);
#endif
double ret = norm(cpumat, gpumat);
if (ret < 0.1)
return true;
else
{
std::cout << "return : " << ret << "\n";
return false;
}
} }
void CV_GpuMatOpSetTo::run( int /* start_from */) void CV_GpuMatOpSetTo::run( int /* start_from */)