From 0aaaad1ea87eb719044e3c7b125965f934e7a636 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 21 Sep 2011 08:58:54 +0000 Subject: [PATCH] implemented gpu::addWeighted --- modules/gpu/include/opencv2/gpu/gpu.hpp | 4 + modules/gpu/perf/perf_arithm.cpp | 31 ++ modules/gpu/src/cuda/element_operations.cu | 322 +++++++++++++ modules/gpu/src/element_operations.cpp | 506 ++++++++++++++++++++- modules/gpu/test/test_arithm.cpp | 96 +++- 5 files changed, 943 insertions(+), 16 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index dbb110741..7377f5257 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -593,6 +593,10 @@ namespace cv //! computes per-element maximum of array and scalar (dst = max(src1, src2)) CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays + CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, + int dtype = -1, Stream& stream = Stream::Null()); + ////////////////////////////// Image processing ////////////////////////////// diff --git a/modules/gpu/perf/perf_arithm.cpp b/modules/gpu/perf/perf_arithm.cpp index f5c4da3b0..2c915904c 100644 --- a/modules/gpu/perf/perf_arithm.cpp +++ b/modules/gpu/perf/perf_arithm.cpp @@ -685,3 +685,34 @@ PERF_TEST_P(DevInfo_Size_MatType, countNonZero, testing::Combine(testing::Values SANITY_CHECK(dst); } + +PERF_TEST_P(DevInfo_Size_MatType, addWeighted, testing::Combine(testing::ValuesIn(devices()), + testing::Values(GPU_TYPICAL_MAT_SIZES), + testing::Values(CV_8UC1, CV_16UC1, CV_32FC1))) +{ + DeviceInfo devInfo = std::tr1::get<0>(GetParam()); + Size size = std::tr1::get<1>(GetParam()); + int type = std::tr1::get<2>(GetParam()); + + setDevice(devInfo.deviceID()); + + Mat src1_host(size, type); + Mat src2_host(size, type); + + declare.in(src1_host, src2_host, WARMUP_RNG); + + GpuMat src1(src1_host); + GpuMat src2(src2_host); + GpuMat dst(size, type); + + declare.time(0.5).iterations(100); + + SIMPLE_TEST_CYCLE() + { + addWeighted(src1, 0.5, src2, 0.5, 0.0, dst); + } + + Mat dst_host = dst; + + SANITY_CHECK(dst_host); +} diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 71574b734..7548e901f 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -757,4 +757,326 @@ namespace cv { namespace gpu { namespace device } template void multiplyScalar_gpu(const DevMem2D& src, float scale, const DevMem2D& dst, cudaStream_t stream); + + + ////////////////////////////////////////////////////////////////////////// + // addWeighted + + template struct AddWeighted : binary_function + { + __host__ __device__ __forceinline__ AddWeighted(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} + + __device__ __forceinline__ D operator ()(typename TypeTraits::ParameterType a, typename TypeTraits::ParameterType b) const + { + return saturate_cast(alpha * a + beta * b + gamma); + } + + const double alpha; + const double beta; + const double gamma; + }; + + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_shift = 4 }; + }; + + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 4 }; + }; + template <> struct TransformFunctorTraits< AddWeighted > : DefaultTransformFunctorTraits< AddWeighted > + { + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 4 }; + }; + + template + void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream) + { + cudaSafeCall( cudaSetDoubleForDevice(&alpha) ); + cudaSafeCall( cudaSetDoubleForDevice(&beta) ); + cudaSafeCall( cudaSetDoubleForDevice(&gamma) ); + + AddWeighted op(alpha, beta, gamma); + + transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), static_cast< DevMem2D_ >(dst), op, stream); + } + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + + + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + template void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); }}} diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 68724fe66..4cf857ed4 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -67,7 +67,8 @@ void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_nogpu(); return 0.0;} -void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_nogpu(); } #else @@ -248,7 +249,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& { dst.create(src.size(), src.type()); - device::multiplyScalar_gpu(src.reshape(1), (float)(sc[0]), dst, StreamAccessor::getStream(stream)); + device::multiplyScalar_gpu(src.reshape(1), (float)(sc[0]), dst, StreamAccessor::getStream(stream)); } else { @@ -733,7 +734,7 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); static const func_t funcs[] = { - min_caller, min_caller, min_caller, min_caller, min_caller, + min_caller, min_caller, min_caller, min_caller, min_caller, min_caller, min_caller }; funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); @@ -746,7 +747,7 @@ void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream) typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); static const func_t funcs[] = { - min_caller, min_caller, min_caller, min_caller, min_caller, + min_caller, min_caller, min_caller, min_caller, min_caller, min_caller, min_caller }; funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); @@ -761,7 +762,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); static const func_t funcs[] = { - max_caller, max_caller, max_caller, max_caller, max_caller, + max_caller, max_caller, max_caller, max_caller, max_caller, max_caller, max_caller }; funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); @@ -775,7 +776,7 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream) typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); static const func_t funcs[] = { - max_caller, max_caller, max_caller, max_caller, max_caller, + max_caller, max_caller, max_caller, max_caller, max_caller, max_caller, max_caller }; funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream)); @@ -875,4 +876,497 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream)); } +//////////////////////////////////////////////////////////////////////// +// addWeighted + +namespace cv { namespace gpu { namespace device +{ + template + void addWeighted_gpu(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); +}}} + +void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream) +{ + CV_Assert(src1.size() == src2.size()); + CV_Assert(src1.type() == src2.type() || (dtype >= 0 && src1.channels() == src2.channels())); + + dtype = dtype >= 0 ? CV_MAKETYPE(dtype, src1.channels()) : src1.type(); + + dst.create(src1.size(), dtype); + + const GpuMat* psrc1 = &src1; + const GpuMat* psrc2 = &src2; + + if (src1.depth() > src2.depth()) + { + std::swap(psrc1, psrc2); + std::swap(alpha, beta); + } + + typedef void (*caller_t)(const DevMem2D& src1, double alpha, const DevMem2D& src2, double beta, double gamma, const DevMem2D& dst, cudaStream_t stream); + + using namespace cv::gpu::device; + + static const caller_t callers[7][7][7] = + { + { + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + }, + { + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + }, + { + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + }, + { + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + }, + { + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + }, + { + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + }, + { + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/, + 0/*addWeighted_gpu*/ + }, + { + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu, + addWeighted_gpu + } + } + }; + + callers[psrc1->depth()][psrc2->depth()][dst.depth()](psrc1->reshape(1), alpha, psrc2->reshape(1), beta, gamma, dst.reshape(1), StreamAccessor::getStream(stream)); +} + #endif diff --git a/modules/gpu/test/test_arithm.cpp b/modules/gpu/test/test_arithm.cpp index a8fcccdca..31365ca1f 100644 --- a/modules/gpu/test/test_arithm.cpp +++ b/modules/gpu/test/test_arithm.cpp @@ -1135,7 +1135,7 @@ TEST_P(MinMax, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); double minVal, maxVal; @@ -1216,7 +1216,7 @@ TEST_P(MinMaxLoc, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); double minVal, maxVal; @@ -1281,7 +1281,7 @@ TEST_P(CountNonZero, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); int n; @@ -1333,7 +1333,7 @@ TEST_P(Sum, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Scalar sum; @@ -1385,7 +1385,7 @@ TEST_P(AbsSum, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Scalar sum; @@ -1439,7 +1439,7 @@ TEST_P(SqrSum, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Scalar sum; @@ -1500,7 +1500,7 @@ TEST_P(BitwiseNot, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Mat dst; @@ -1564,7 +1564,7 @@ TEST_P(BitwiseOr, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Mat dst; @@ -1628,7 +1628,7 @@ TEST_P(BitwiseAnd, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Mat dst; @@ -1692,7 +1692,7 @@ TEST_P(BitwiseXor, Accuracy) return; PRINT_PARAM(devInfo); - PRINT_TYPE(type) + PRINT_TYPE(type); PRINT_PARAM(size); cv::Mat dst; @@ -1712,4 +1712,80 @@ INSTANTIATE_TEST_CASE_P(Arithm, BitwiseXor, testing::Combine( testing::ValuesIn(devices()), testing::ValuesIn(all_types()))); +////////////////////////////////////////////////////////////////////////////// +// addWeighted + +struct AddWeighted : testing::TestWithParam< std::tr1::tuple > +{ + cv::gpu::DeviceInfo devInfo; + int type1; + int type2; + int dtype; + + cv::Size size; + cv::Mat src1; + cv::Mat src2; + double alpha; + double beta; + double gamma; + + cv::Mat dst_gold; + + virtual void SetUp() + { + devInfo = std::tr1::get<0>(GetParam()); + type1 = std::tr1::get<1>(GetParam()); + type2 = std::tr1::get<2>(GetParam()); + dtype = std::tr1::get<3>(GetParam()); + + cv::gpu::setDevice(devInfo.deviceID()); + + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + + size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200)); + + src1 = cvtest::randomMat(rng, size, type1, 0.0, 255.0, false); + src2 = cvtest::randomMat(rng, size, type2, 0.0, 255.0, false); + + alpha = rng.uniform(-10.0, 10.0); + beta = rng.uniform(-10.0, 10.0); + gamma = rng.uniform(-10.0, 10.0); + + cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dtype); + } +}; + +TEST_P(AddWeighted, Accuracy) +{ + if ((src1.depth() == CV_64F || src2.depth() == CV_64F || dst_gold.depth() == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + return; + + PRINT_PARAM(devInfo); + PRINT_TYPE(type1); + PRINT_TYPE(type2); + PRINT_TYPE(dtype); + PRINT_PARAM(size); + PRINT_PARAM(alpha); + PRINT_PARAM(beta); + PRINT_PARAM(gamma); + + cv::Mat dst; + + ASSERT_NO_THROW( + cv::gpu::GpuMat dev_dst; + + cv::gpu::addWeighted(cv::gpu::GpuMat(src1), alpha, cv::gpu::GpuMat(src2), beta, gamma, dev_dst, dtype); + + dev_dst.download(dst); + ); + + EXPECT_MAT_NEAR(dst_gold, dst, dtype < CV_32F ? 1.0 : 1e-12); +} + +INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, testing::Combine( + testing::ValuesIn(devices()), + testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)), + testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)), + testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)))); + #endif // HAVE_CUDA