added absSum function
This commit is contained in:
parent
9b4c682623
commit
ae529f4bc6
@ -766,6 +766,14 @@ namespace cv
|
|||||||
//! supports only single channel images
|
//! supports only single channel images
|
||||||
CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
|
CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf);
|
||||||
|
|
||||||
|
//! computes sum of array elements absolute values
|
||||||
|
//! supports only single channel images
|
||||||
|
CV_EXPORTS Scalar absSum(const GpuMat& src);
|
||||||
|
|
||||||
|
//! computes sum of array elements absolute values
|
||||||
|
//! supports only single channel images
|
||||||
|
CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf);
|
||||||
|
|
||||||
//! computes squared sum of array elements
|
//! computes squared sum of array elements
|
||||||
//! supports only single channel images
|
//! supports only single channel images
|
||||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src);
|
CV_EXPORTS Scalar sqrSum(const GpuMat& src);
|
||||||
|
@ -953,6 +953,12 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename R>
|
template <typename R>
|
||||||
struct IdentityOp { static __device__ R call(R x) { return x; } };
|
struct IdentityOp { static __device__ R call(R x) { return x; } };
|
||||||
|
|
||||||
|
template <typename R>
|
||||||
|
struct AbsOp { static __device__ R call(R x) { return abs(x); } };
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct AbsOp<uint> { static __device__ uint call(uint x) { return x; } };
|
||||||
|
|
||||||
template <typename R>
|
template <typename R>
|
||||||
struct SqrOp { static __device__ R call(R x) { return x * x; } };
|
struct SqrOp { static __device__ R call(R x) { return x * x; } };
|
||||||
|
|
||||||
@ -1509,6 +1515,110 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template void sumCaller<float>(const DevMem2D, PtrStep, double*, int);
|
template void sumCaller<float>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void absSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn)
|
||||||
|
{
|
||||||
|
using namespace sums;
|
||||||
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
|
dim3 threads, grid;
|
||||||
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
|
setKernelConsts(src.cols, src.rows, threads, grid);
|
||||||
|
|
||||||
|
switch (cn)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
sumKernel<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||||
|
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
|
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
sumKernel_C2<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||||
|
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
|
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
sumKernel_C3<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||||
|
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
|
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
sumKernel_C4<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||||
|
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||||
|
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
|
||||||
|
R result[4] = {0, 0, 0, 0};
|
||||||
|
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
sum[0] = result[0];
|
||||||
|
sum[1] = result[1];
|
||||||
|
sum[2] = result[2];
|
||||||
|
sum[3] = result[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
template void absSumMultipassCaller<uchar>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumMultipassCaller<char>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumMultipassCaller<ushort>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumMultipassCaller<short>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumMultipassCaller<int>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumMultipassCaller<float>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void absSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn)
|
||||||
|
{
|
||||||
|
using namespace sums;
|
||||||
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
|
dim3 threads, grid;
|
||||||
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
|
setKernelConsts(src.cols, src.rows, threads, grid);
|
||||||
|
|
||||||
|
switch (cn)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
sumKernel<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
sumKernel_C2<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
sumKernel_C3<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
sumKernel_C4<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||||
|
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
|
|
||||||
|
R result[4] = {0, 0, 0, 0};
|
||||||
|
cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
sum[0] = result[0];
|
||||||
|
sum[1] = result[1];
|
||||||
|
sum[2] = result[2];
|
||||||
|
sum[3] = result[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
template void absSumCaller<uchar>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumCaller<char>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumCaller<ushort>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumCaller<short>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumCaller<int>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
template void absSumCaller<float>(const DevMem2D, PtrStep, double*, int);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn)
|
void sqrSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
|
@ -52,6 +52,8 @@ double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
|||||||
double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; }
|
||||||
Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); }
|
Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||||
Scalar cv::gpu::sum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); }
|
Scalar cv::gpu::sum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||||
|
Scalar cv::gpu::absSum(const GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||||
|
Scalar cv::gpu::absSum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||||
Scalar cv::gpu::sqrSum(const GpuMat&) { throw_nogpu(); return Scalar(); }
|
Scalar cv::gpu::sqrSum(const GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||||
Scalar cv::gpu::sqrSum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); }
|
Scalar cv::gpu::sqrSum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); }
|
||||||
void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_nogpu(); }
|
void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_nogpu(); }
|
||||||
@ -128,6 +130,12 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void sumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn);
|
void sumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void absSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void absSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn);
|
void sqrSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn);
|
||||||
|
|
||||||
@ -166,7 +174,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
|||||||
|
|
||||||
Size buf_size;
|
Size buf_size;
|
||||||
sums::getBufSizeRequired(src.cols, src.rows, src.channels(),
|
sums::getBufSizeRequired(src.cols, src.rows, src.channels(),
|
||||||
buf_size.width, buf_size.height);
|
buf_size.width, buf_size.height);
|
||||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||||
|
|
||||||
Caller* callers = multipass_callers;
|
Caller* callers = multipass_callers;
|
||||||
@ -182,6 +190,47 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Scalar cv::gpu::absSum(const GpuMat& src)
|
||||||
|
{
|
||||||
|
GpuMat buf;
|
||||||
|
return absSum(src, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
||||||
|
{
|
||||||
|
using namespace mathfunc;
|
||||||
|
|
||||||
|
typedef void (*Caller)(const DevMem2D, PtrStep, double*, int);
|
||||||
|
|
||||||
|
static Caller multipass_callers[7] = {
|
||||||
|
absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>,
|
||||||
|
absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>,
|
||||||
|
absSumMultipassCaller<int>, absSumMultipassCaller<float>, 0 };
|
||||||
|
|
||||||
|
static Caller singlepass_callers[7] = {
|
||||||
|
absSumCaller<unsigned char>, absSumCaller<char>,
|
||||||
|
absSumCaller<unsigned short>, absSumCaller<short>,
|
||||||
|
absSumCaller<int>, absSumCaller<float>, 0 };
|
||||||
|
|
||||||
|
Size buf_size;
|
||||||
|
sums::getBufSizeRequired(src.cols, src.rows, src.channels(),
|
||||||
|
buf_size.width, buf_size.height);
|
||||||
|
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||||
|
|
||||||
|
Caller* callers = multipass_callers;
|
||||||
|
if (TargetArchs::builtWith(ATOMICS) && DeviceInfo().has(ATOMICS))
|
||||||
|
callers = singlepass_callers;
|
||||||
|
|
||||||
|
Caller caller = callers[src.depth()];
|
||||||
|
if (!caller) CV_Error(CV_StsBadArg, "absSum: unsupported type");
|
||||||
|
|
||||||
|
double result[4];
|
||||||
|
caller(src, buf, result, src.channels());
|
||||||
|
return Scalar(result[0], result[1], result[2], result[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Scalar cv::gpu::sqrSum(const GpuMat& src)
|
Scalar cv::gpu::sqrSum(const GpuMat& src)
|
||||||
{
|
{
|
||||||
GpuMat buf;
|
GpuMat buf;
|
||||||
@ -222,6 +271,9 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
|||||||
return Scalar(result[0], result[1], result[2], result[3]);
|
return Scalar(result[0], result[1], result[2], result[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Find min or max
|
// Find min or max
|
||||||
|
|
||||||
|
@ -956,6 +956,10 @@ struct CV_GpuSumTest: CvTest
|
|||||||
int typemax = CV_32F;
|
int typemax = CV_32F;
|
||||||
for (int type = CV_8U; type <= typemax; ++type)
|
for (int type = CV_8U; type <= typemax; ++type)
|
||||||
{
|
{
|
||||||
|
//
|
||||||
|
// sum
|
||||||
|
//
|
||||||
|
|
||||||
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
|
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
|
||||||
a = sum(src);
|
a = sum(src);
|
||||||
b = sum(GpuMat(src));
|
b = sum(GpuMat(src));
|
||||||
@ -965,6 +969,7 @@ struct CV_GpuSumTest: CvTest
|
|||||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 3), src);
|
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 3), src);
|
||||||
a = sum(src);
|
a = sum(src);
|
||||||
b = sum(GpuMat(src));
|
b = sum(GpuMat(src));
|
||||||
@ -974,6 +979,7 @@ struct CV_GpuSumTest: CvTest
|
|||||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 4), src);
|
gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 4), src);
|
||||||
a = sum(src);
|
a = sum(src);
|
||||||
b = sum(GpuMat(src));
|
b = sum(GpuMat(src));
|
||||||
@ -983,6 +989,7 @@ struct CV_GpuSumTest: CvTest
|
|||||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
gen(1 + rand() % 500, 1 + rand() % 500, type, src);
|
gen(1 + rand() % 500, 1 + rand() % 500, type, src);
|
||||||
a = sum(src);
|
a = sum(src);
|
||||||
b = sum(GpuMat(src));
|
b = sum(GpuMat(src));
|
||||||
@ -992,6 +999,25 @@ struct CV_GpuSumTest: CvTest
|
|||||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// absSum
|
||||||
|
//
|
||||||
|
|
||||||
|
gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src);
|
||||||
|
b = absSum(GpuMat(src));
|
||||||
|
a = norm(src, NORM_L1);
|
||||||
|
if (abs(a[0] - b[0]) > src.size().area() * max_err)
|
||||||
|
{
|
||||||
|
ts->printf(CvTS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]);
|
||||||
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// sqrSum
|
||||||
|
//
|
||||||
|
|
||||||
if (type != CV_8S)
|
if (type != CV_8S)
|
||||||
{
|
{
|
||||||
gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src);
|
gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user