added support of multichannel images into gpu::sqrSum
This commit is contained in:
@@ -1989,7 +1989,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T>
|
||||
void sqsum_multipass_caller(const DevMem2D src, PtrStep buf, double* sum)
|
||||
void sqsum_multipass_caller(const DevMem2D src, PtrStep buf, double* sum, int cn)
|
||||
{
|
||||
using namespace sum;
|
||||
typedef typename SumType<T>::R R;
|
||||
@@ -1998,27 +1998,54 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
estimate_thread_cfg(src.cols, src.rows, threads, grid);
|
||||
set_kernel_consts(src.cols, src.rows, threads, grid);
|
||||
|
||||
sum_kernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
sum_pass2_kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
switch (cn)
|
||||
{
|
||||
case 1:
|
||||
sum_kernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
sum_pass2_kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
break;
|
||||
case 2:
|
||||
sum_kernel_C2<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
sum_pass2_kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
break;
|
||||
case 3:
|
||||
sum_kernel_C3<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
sum_pass2_kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
break;
|
||||
case 4:
|
||||
sum_kernel_C4<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
sum_pass2_kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
break;
|
||||
}
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
|
||||
R result = 0;
|
||||
cudaSafeCall(cudaMemcpy(&result, buf.ptr(0), sizeof(R), cudaMemcpyDeviceToHost));
|
||||
sum[0] = result;
|
||||
R result[4] = {0, 0, 0, 0};
|
||||
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||
|
||||
sum[0] = result[0];
|
||||
sum[1] = result[1];
|
||||
sum[2] = result[2];
|
||||
sum[3] = result[3];
|
||||
}
|
||||
|
||||
template void sqsum_multipass_caller<unsigned char>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_multipass_caller<char>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_multipass_caller<unsigned short>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_multipass_caller<short>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_multipass_caller<int>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_multipass_caller<float>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_multipass_caller<unsigned char>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_multipass_caller<char>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_multipass_caller<unsigned short>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_multipass_caller<short>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_multipass_caller<int>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_multipass_caller<float>(const DevMem2D, PtrStep, double*, int);
|
||||
|
||||
|
||||
template <typename T>
|
||||
void sqsum_caller(const DevMem2D src, PtrStep buf, double* sum)
|
||||
void sqsum_caller(const DevMem2D src, PtrStep buf, double* sum, int cn)
|
||||
{
|
||||
using namespace sum;
|
||||
typedef typename SumType<T>::R R;
|
||||
@@ -2027,20 +2054,42 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
estimate_thread_cfg(src.cols, src.rows, threads, grid);
|
||||
set_kernel_consts(src.cols, src.rows, threads, grid);
|
||||
|
||||
sum_kernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
switch (cn)
|
||||
{
|
||||
case 1:
|
||||
sum_kernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sum_kernel_C2<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sum_kernel_C3<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sum_kernel_C4<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
|
||||
R result = 0;
|
||||
cudaSafeCall(cudaMemcpy(&result, buf.ptr(0), sizeof(R), cudaMemcpyDeviceToHost));
|
||||
sum[0] = result;
|
||||
}
|
||||
R result[4] = {0, 0, 0, 0};
|
||||
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||
|
||||
template void sqsum_caller<unsigned char>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_caller<char>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_caller<unsigned short>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_caller<short>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_caller<int>(const DevMem2D, PtrStep, double*);
|
||||
template void sqsum_caller<float>(const DevMem2D, PtrStep, double*);
|
||||
sum[0] = result[0];
|
||||
sum[1] = result[1];
|
||||
sum[2] = result[2];
|
||||
sum[3] = result[3];
|
||||
}
|
||||
|
||||
template void sqsum_caller<unsigned char>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_caller<char>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_caller<unsigned short>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_caller<short>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_caller<int>(const DevMem2D, PtrStep, double*, int);
|
||||
template void sqsum_caller<float>(const DevMem2D, PtrStep, double*, int);
|
||||
}}}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user