minor refactoring of gpu module
This commit is contained in:
parent
9f80317ffa
commit
79b1eec3d3
@ -1510,33 +1510,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T sum_caller(const DevMem2D_<T> src, PtrStep buf)
|
|
||||||
{
|
|
||||||
dim3 threads, grid;
|
|
||||||
estimate_thread_cfg(src.cols, src.rows, threads, grid);
|
|
||||||
set_kernel_consts(src.cols, src.rows, threads, grid);
|
|
||||||
|
|
||||||
T* buf_ = (T*)buf.ptr(0);
|
|
||||||
|
|
||||||
sum_kernel<T, threads_x * threads_y><<<grid, threads>>>(src, buf_);
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
|
|
||||||
T sum;
|
|
||||||
cudaSafeCall(cudaMemcpy(&sum, buf_, sizeof(T), cudaMemcpyDeviceToHost));
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
template unsigned char sum_caller<unsigned char>(const DevMem2D_<unsigned char>, PtrStep);
|
|
||||||
template char sum_caller<char>(const DevMem2D_<char>, PtrStep);
|
|
||||||
template unsigned short sum_caller<unsigned short>(const DevMem2D_<unsigned short>, PtrStep);
|
|
||||||
template short sum_caller<short>(const DevMem2D_<short>, PtrStep);
|
|
||||||
template int sum_caller<int>(const DevMem2D_<int>, PtrStep);
|
|
||||||
template float sum_caller<float>(const DevMem2D_<float>, PtrStep);
|
|
||||||
template double sum_caller<double>(const DevMem2D_<double>, PtrStep);
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, int nthreads>
|
template <typename T, int nthreads>
|
||||||
__global__ void sum_pass2_kernel(T* result, int size)
|
__global__ void sum_pass2_kernel(T* result, int size)
|
||||||
{
|
{
|
||||||
@ -1550,10 +1523,14 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
result[0] = smem[0];
|
result[0] = smem[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace sum
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T sum_multipass_caller(const DevMem2D_<T> src, PtrStep buf)
|
T sum_multipass_caller(const DevMem2D_<T> src, PtrStep buf)
|
||||||
{
|
{
|
||||||
|
using namespace sum;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimate_thread_cfg(src.cols, src.rows, threads, grid);
|
estimate_thread_cfg(src.cols, src.rows, threads, grid);
|
||||||
set_kernel_consts(src.cols, src.rows, threads, grid);
|
set_kernel_consts(src.cols, src.rows, threads, grid);
|
||||||
@ -1578,5 +1555,32 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template int sum_multipass_caller<int>(const DevMem2D_<int>, PtrStep);
|
template int sum_multipass_caller<int>(const DevMem2D_<int>, PtrStep);
|
||||||
template float sum_multipass_caller<float>(const DevMem2D_<float>, PtrStep);
|
template float sum_multipass_caller<float>(const DevMem2D_<float>, PtrStep);
|
||||||
|
|
||||||
} // namespace sum
|
|
||||||
|
template <typename T>
|
||||||
|
T sum_caller(const DevMem2D_<T> src, PtrStep buf)
|
||||||
|
{
|
||||||
|
using namespace sum;
|
||||||
|
|
||||||
|
dim3 threads, grid;
|
||||||
|
estimate_thread_cfg(src.cols, src.rows, threads, grid);
|
||||||
|
set_kernel_consts(src.cols, src.rows, threads, grid);
|
||||||
|
|
||||||
|
T* buf_ = (T*)buf.ptr(0);
|
||||||
|
|
||||||
|
sum_kernel<T, threads_x * threads_y><<<grid, threads>>>(src, buf_);
|
||||||
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
|
||||||
|
T sum;
|
||||||
|
cudaSafeCall(cudaMemcpy(&sum, buf_, sizeof(T), cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template unsigned char sum_caller<unsigned char>(const DevMem2D_<unsigned char>, PtrStep);
|
||||||
|
template char sum_caller<char>(const DevMem2D_<char>, PtrStep);
|
||||||
|
template unsigned short sum_caller<unsigned short>(const DevMem2D_<unsigned short>, PtrStep);
|
||||||
|
template short sum_caller<short>(const DevMem2D_<short>, PtrStep);
|
||||||
|
template int sum_caller<int>(const DevMem2D_<int>, PtrStep);
|
||||||
|
template float sum_caller<float>(const DevMem2D_<float>, PtrStep);
|
||||||
|
template double sum_caller<double>(const DevMem2D_<double>, PtrStep);
|
||||||
}}}
|
}}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user