fixed implementation of gpumat::setTo()

This commit is contained in:
Andrey Morozov
2010-07-21 13:35:41 +00:00
parent f37ac8e4ca
commit 2fe75ea33e
2 changed files with 54 additions and 174 deletions

View File

@@ -76,19 +76,21 @@ namespace mat_operators
};
template <typename T, int channels>
__device__ size_t GetIndex(size_t i, int cols, int rows, int step)
__device__ size_t GetIndex(size_t i, int cols, int step)
{
return ((i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T))) +
(i % static_cast<size_t>(rows))*static_cast<size_t>(channels) ;
size_t ret = (i / static_cast<size_t>(cols))*static_cast<size_t>(step) / static_cast<size_t>(sizeof(T)) +
(i % static_cast<size_t>(cols))*static_cast<size_t>(channels);
return ret;
}
template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step)
{
size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows)
{
unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, rows, step));
unroll<T, channels>::unroll_set(mat, GetIndex<T,channels>(i, cols, step));
}
}
@@ -97,7 +99,7 @@ namespace mat_operators
{
size_t i = (blockIdx.x * blockDim.x + threadIdx.x);
if (i < cols * rows)
unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, rows, step));
unroll<T, channels>::unroll_set_with_mask(mat, mask[i], GetIndex<T,channels>(i, cols, step));
}
}
@@ -105,10 +107,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
{
// download scalar to constant memory
float data[4];
data[0] = scalar[0];
data[1] = scalar[1];
data[2] = scalar[2];
data[3] = scalar[3];
data[0] = static_cast<float>(scalar[0]);
data[1] = static_cast<float>(scalar[1]);
data[2] = static_cast<float>(scalar[2]);
data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256,1,1);
@@ -144,10 +146,10 @@ extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const doubl
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int elemSize1, int channels)
{
float data[4];
data[0] = scalar[0];
data[1] = scalar[1];
data[2] = scalar[2];
data[3] = scalar[3];
data[0] = static_cast<float>(scalar[0]);
data[1] = static_cast<float>(scalar[1]);
data[2] = static_cast<float>(scalar[2]);
data[3] = static_cast<float>(scalar[3]);
cudaSafeCall( cudaMemcpyToSymbol(scalar_d, &data, sizeof(data)));
dim3 threadsPerBlock(256, 1, 1);