Merge pull request #1540 from jet47:gpuarithm-cudev

This commit is contained in:
Roman Donchenko
2013-10-21 16:34:44 +04:00
committed by OpenCV Buildbot
64 changed files with 5250 additions and 8808 deletions

View File

@@ -73,7 +73,7 @@
#include "cudev/block/vec_distance.hpp"
#include "cudev/grid/copy.hpp"
#include "cudev/grid/glob_reduce.hpp"
#include "cudev/grid/reduce.hpp"
#include "cudev/grid/histogram.hpp"
#include "cudev/grid/integral.hpp"
#include "cudev/grid/pyramids.hpp"

View File

@@ -47,7 +47,7 @@
#define __OPENCV_CUDEV_EXPR_REDUCTION_HPP__
#include "../common.hpp"
#include "../grid/glob_reduce.hpp"
#include "../grid/reduce.hpp"
#include "../grid/histogram.hpp"
#include "../grid/integral.hpp"
#include "../grid/reduce_to_vec.hpp"

View File

@@ -616,6 +616,30 @@ template <typename T> struct magnitude_func : binary_function<T, T, typename fun
}
};
template <typename T> struct magnitude_sqr_func : binary_function<T, T, typename functional_detail::FloatType<T>::type>
{
__device__ __forceinline__ typename functional_detail::FloatType<T>::type operator ()(typename TypeTraits<T>::parameter_type a, typename TypeTraits<T>::parameter_type b) const
{
return a * a + b * b;
}
};
template <typename T, bool angleInDegrees> struct direction_func : binary_function<T, T, T>
{
__device__ T operator ()(T x, T y) const
{
atan2_func<T> f;
typename atan2_func<T>::result_type angle = f(y, x);
angle += (angle < 0) * (2.0f * CV_PI_F);
if (angleInDegrees)
angle *= (180.0f / CV_PI_F);
return saturate_cast<T>(angle);
}
};
template <typename T> struct pow_func : binary_function<T, float, float>
{
__device__ __forceinline__ float operator ()(T val, float power) const

View File

@@ -594,7 +594,7 @@ namespace integral_detail
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
__host__ static void integral(const GlobPtr<uchar> src, GlobPtr<uint> dst, int rows, int cols, cudaStream_t stream)
__host__ static void integral(const GlobPtr<uchar>& src, const GlobPtr<uint>& dst, int rows, int cols, cudaStream_t stream)
{
if (deviceSupports(FEATURE_SET_COMPUTE_30)
&& (cols % 16 == 0)
@@ -614,7 +614,7 @@ namespace integral_detail
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
__host__ static void integral(const GlobPtr<uchar> src, GlobPtr<int> dst, int rows, int cols, cudaStream_t stream)
__host__ __forceinline__ void integral(const GlobPtr<uchar>& src, const GlobPtr<int>& dst, int rows, int cols, cudaStream_t stream)
{
GlobPtr<uint> dstui = globPtr((uint*) dst.data, dst.step);
integral(src, dstui, rows, cols, stream);

View File

@@ -0,0 +1,177 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma once
#ifndef __OPENCV_CUDEV_GRID_MINMAXLOC_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_MINMAXLOC_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/vec_traits.hpp"
#include "../../util/type_traits.hpp"
#include "../../util/limits.hpp"
#include "../../block/reduce.hpp"
namespace cv { namespace cudev {
namespace grid_minmaxloc_detail
{
template <int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
__global__ void minMaxLoc_pass_1(const SrcPtr src, ResType* minVal, ResType* maxVal, int* minLoc, int* maxLoc, const MaskPtr mask, const int rows, const int cols, const int patch_y, const int patch_x)
{
__shared__ ResType sMinVal[BLOCK_SIZE];
__shared__ ResType sMaxVal[BLOCK_SIZE];
__shared__ uint sMinLoc[BLOCK_SIZE];
__shared__ uint sMaxLoc[BLOCK_SIZE];
const int x0 = blockIdx.x * blockDim.x * patch_x + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * patch_y + threadIdx.y;
ResType myMin = numeric_limits<ResType>::max();
ResType myMax = -numeric_limits<ResType>::max();
int myMinLoc = -1;
int myMaxLoc = -1;
for (int i = 0, y = y0; i < patch_y && y < rows; ++i, y += blockDim.y)
{
for (int j = 0, x = x0; j < patch_x && x < cols; ++j, x += blockDim.x)
{
if (mask(y, x))
{
const ResType srcVal = src(y, x);
if (srcVal < myMin)
{
myMin = srcVal;
myMinLoc = y * cols + x;
}
if (srcVal > myMax)
{
myMax = srcVal;
myMaxLoc = y * cols + x;
}
}
}
}
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
blockReduceKeyVal<BLOCK_SIZE>(smem_tuple(sMinVal, sMaxVal), tie(myMin, myMax),
smem_tuple(sMinLoc, sMaxLoc), tie(myMinLoc, myMaxLoc),
tid,
make_tuple(less<ResType>(), greater<ResType>()));
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
if (tid == 0)
{
minVal[bid] = myMin;
maxVal[bid] = myMax;
minLoc[bid] = myMinLoc;
maxLoc[bid] = myMaxLoc;
}
}
template <int BLOCK_SIZE, typename T>
__global__ void minMaxLoc_pass_2(T* minMal, T* maxVal, int* minLoc, int* maxLoc, int count)
{
__shared__ T sMinVal[BLOCK_SIZE];
__shared__ T sMaxVal[BLOCK_SIZE];
__shared__ int sMinLoc[BLOCK_SIZE];
__shared__ int sMaxLoc[BLOCK_SIZE];
const int idx = ::min(threadIdx.x, count - 1);
T myMin = minMal[idx];
T myMax = maxVal[idx];
int myMinLoc = minLoc[idx];
int myMaxLoc = maxLoc[idx];
blockReduceKeyVal<BLOCK_SIZE>(smem_tuple(sMinVal, sMaxVal), tie(myMin, myMax),
smem_tuple(sMinLoc, sMaxLoc), tie(myMinLoc, myMaxLoc),
threadIdx.x,
make_tuple(less<T>(), greater<T>()));
if (threadIdx.x == 0)
{
minMal[0] = myMin;
maxVal[0] = myMax;
minLoc[0] = myMinLoc;
maxLoc[0] = myMaxLoc;
}
}
template <class Policy>
void getLaunchCfg(int rows, int cols, dim3& block, dim3& grid)
{
block = dim3(Policy::block_size_x, Policy::block_size_y);
grid = dim3(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));
grid.x = ::min(grid.x, block.x);
grid.y = ::min(grid.y, block.y);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void minMaxLoc(const SrcPtr& src, ResType* minVal, ResType* maxVal, int* minLoc, int* maxLoc, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
dim3 block, grid;
getLaunchCfg<Policy>(cols, rows, block, grid);
const int patch_x = divUp(divUp(cols, grid.x), block.x);
const int patch_y = divUp(divUp(rows, grid.y), block.y);
minMaxLoc_pass_1<Policy::block_size_x * Policy::block_size_y><<<grid, block, 0, stream>>>(src, minVal, maxVal, minLoc, maxLoc, mask, rows, cols, patch_y, patch_x);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
minMaxLoc_pass_2<Policy::block_size_x * Policy::block_size_y><<<1, Policy::block_size_x * Policy::block_size_y, 0, stream>>>(minVal, maxVal, minLoc, maxLoc, grid.x * grid.y);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
}
}}
#endif

View File

@@ -43,8 +43,8 @@
#pragma once
#ifndef __OPENCV_CUDEV_GRID_GLOB_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_GLOB_REDUCE_DETAIL_HPP__
#ifndef __OPENCV_CUDEV_GRID_REDUCE_DETAIL_HPP__
#define __OPENCV_CUDEV_GRID_REDUCE_DETAIL_HPP__
#include "../../common.hpp"
#include "../../util/tuple.hpp"
@@ -59,7 +59,7 @@
namespace cv { namespace cudev {
namespace grid_glob_reduce_detail
namespace grid_reduce_detail
{
// Unroll
@@ -389,7 +389,7 @@ namespace grid_glob_reduce_detail
// glob_reduce
template <class Reductor, int BLOCK_SIZE, int PATCH_X, int PATCH_Y, class SrcPtr, typename ResType, class MaskPtr>
__global__ void glob_reduce(const SrcPtr src, ResType* result, const MaskPtr mask, const int rows, const int cols)
__global__ void reduce(const SrcPtr src, ResType* result, const MaskPtr mask, const int rows, const int cols)
{
const int x0 = blockIdx.x * blockDim.x * PATCH_X + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * PATCH_Y + threadIdx.y;
@@ -413,14 +413,12 @@ namespace grid_glob_reduce_detail
}
template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void glob_reduce(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
__host__ void reduce(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
const dim3 block(Policy::block_size_x, Policy::block_size_y);
const dim3 grid(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));
const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y;
glob_reduce<Reductor, BLOCK_SIZE, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols);
reduce<Reductor, Policy::block_size_x * Policy::block_size_y, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)
@@ -433,40 +431,33 @@ namespace grid_glob_reduce_detail
__host__ void sum(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
typedef typename VecTraits<ResType>::elem_type res_elem_type;
glob_reduce<SumReductor<src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
reduce<SumReductor<src_type, ResType>, Policy>(src, (res_elem_type*) result, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void minVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<MinMaxReductor<minop<work_type>, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
reduce<MinMaxReductor<minop<ResType>, src_type, ResType>, Policy>(src, result, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void maxVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<MinMaxReductor<maxop<work_type>, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
reduce<MinMaxReductor<maxop<ResType>, src_type, ResType>, Policy>(src, result, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void minMaxVal(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
const int cn = VecTraits<src_type>::cn;
typedef typename MakeVec<ResType, cn>::type work_type;
glob_reduce<MinMaxReductor<both, src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
reduce<MinMaxReductor<both, src_type, ResType>, Policy>(src, result, mask, rows, cols, stream);
}
}

View File

@@ -54,12 +54,52 @@ namespace cv { namespace cudev {
namespace grid_reduce_to_vec_detail
{
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor, int cn> struct Reduce;
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 1>
{
__device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal)
{
typename Reductor::template rebind<work_elem_type>::other op;
blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op);
}
};
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 2>
{
__device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal)
{
typename Reductor::template rebind<work_elem_type>::other op;
blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op));
}
};
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 3>
{
__device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal)
{
typename Reductor::template rebind<work_elem_type>::other op;
blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op));
}
};
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 4>
{
__device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal)
{
typename Reductor::template rebind<work_elem_type>::other op;
blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op));
}
};
template <class Reductor, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
__global__ void reduceToColumn(const SrcPtr src, ResType* dst, const MaskPtr mask, const int cols)
{
typedef typename Reductor::work_type work_type;
typedef typename VecTraits<work_type>::elem_type work_elem_type;
const int cn = VecTraits<work_type>::cn;
__shared__ work_type smem[BLOCK_SIZE];
__shared__ work_elem_type smem[cn][BLOCK_SIZE];
const int y = blockIdx.x;
@@ -75,7 +115,7 @@ namespace grid_reduce_to_vec_detail
}
}
blockReduce<BLOCK_SIZE>(smem, myVal, threadIdx.x, op);
Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, cn>::call(smem, myVal);
if (threadIdx.x == 0)
dst[y] = saturate_cast<ResType>(Reductor::result(myVal, cols));

View File

@@ -217,7 +217,7 @@ namespace grid_transform_detail
}
template <int SHIFT, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
__global__ void transformSmart(const GlobPtr<SrcType1> src1_, const GlobPtr<SrcType2> src2_, PtrStep<DstType> dst_, const BinOp op, const MaskPtr mask, const int rows, const int cols)
__global__ void transformSmart(const GlobPtr<SrcType1> src1_, const GlobPtr<SrcType2> src2_, GlobPtr<DstType> dst_, const BinOp op, const MaskPtr mask, const int rows, const int cols)
{
typedef typename MakeVec<SrcType1, SHIFT>::type read_type1;
typedef typename MakeVec<SrcType2, SHIFT>::type read_type2;
@@ -345,25 +345,25 @@ namespace grid_transform_detail
};
template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__host__ void transform(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
__host__ void transform_unary(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__host__ void transform(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
__host__ void transform_binary(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
}
template <class Policy, typename SrcType, typename DstType, class UnOp, class MaskPtr>
__host__ void transform(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
__host__ void transform_unary(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<VecTraits<SrcType>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src, dst, op, mask, rows, cols, stream);
}
template <class Policy, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
__host__ void transform(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
__host__ void transform_binary(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
{
TransformDispatcher<VecTraits<SrcType1>::cn == 1 && VecTraits<SrcType2>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
}

View File

@@ -55,15 +55,12 @@ namespace cv { namespace cudev {
namespace transpose_detail
{
const int TRANSPOSE_TILE_DIM = 16;
const int TRANSPOSE_BLOCK_ROWS = 16;
template <class SrcPtr, typename DstType>
template <int TILE_DIM, int BLOCK_DIM_Y, class SrcPtr, typename DstType>
__global__ void transpose(const SrcPtr src, GlobPtr<DstType> dst, const int rows, const int cols)
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
__shared__ src_type tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1];
__shared__ src_type tile[TILE_DIM][TILE_DIM + 1];
int blockIdx_x, blockIdx_y;
@@ -80,12 +77,12 @@ namespace transpose_detail
blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
}
int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
int xIndex = blockIdx_x * TILE_DIM + threadIdx.x;
int yIndex = blockIdx_y * TILE_DIM + threadIdx.y;
if (xIndex < cols)
{
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
for (int i = 0; i < TILE_DIM; i += BLOCK_DIM_Y)
{
if (yIndex + i < rows)
{
@@ -96,12 +93,12 @@ namespace transpose_detail
__syncthreads();
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
xIndex = blockIdx_y * TILE_DIM + threadIdx.x;
yIndex = blockIdx_x * TILE_DIM + threadIdx.y;
if (xIndex < rows)
{
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
for (int i = 0; i < TILE_DIM; i += BLOCK_DIM_Y)
{
if (yIndex + i < cols)
{
@@ -111,13 +108,13 @@ namespace transpose_detail
}
}
template <class SrcPtr, typename DstType>
template <class Policy, class SrcPtr, typename DstType>
__host__ void transpose(const SrcPtr& src, const GlobPtr<DstType>& dst, int rows, int cols, cudaStream_t stream)
{
const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);
const dim3 block(Policy::tile_dim, Policy::block_dim_y);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
transpose<<<grid, block, 0, stream>>>(src, dst, rows, cols);
transpose<Policy::tile_dim, Policy::block_dim_y><<<grid, block, 0, stream>>>(src, dst, rows, cols);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
if (stream == 0)

View File

@@ -43,8 +43,8 @@
#pragma once
#ifndef __OPENCV_CUDEV_GRID_GLOB_REDUCE_HPP__
#define __OPENCV_CUDEV_GRID_GLOB_REDUCE_HPP__
#ifndef __OPENCV_CUDEV_GRID_REDUCE_HPP__
#define __OPENCV_CUDEV_GRID_REDUCE_HPP__
#include <limits>
#include "../common.hpp"
@@ -52,13 +52,18 @@
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
#include "../ptr2d/transform.hpp"
#include "detail/glob_reduce.hpp"
#include "detail/reduce.hpp"
#include "detail/minmaxloc.hpp"
namespace cv { namespace cudev {
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
CV_StaticAssert( unsigned(VecTraits<src_type>::cn) == unsigned(VecTraits<ResType>::cn), "" );
dst.create(1, 1);
dst.setTo(0, stream);
@@ -67,27 +72,31 @@ __host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskP
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::sum<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
{
typedef typename PtrTraits<SrcPtr>::value_type src_type;
CV_StaticAssert( unsigned(VecTraits<src_type>::cn) == unsigned(VecTraits<ResType>::cn), "" );
dst.create(1, 1);
dst.setTo(0, stream);
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::sum<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
@@ -101,11 +110,11 @@ __host__ void gridFindMinVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const Ma
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::minVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::minVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
@@ -117,11 +126,11 @@ __host__ void gridFindMinVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream&
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::minVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::minVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
@@ -135,11 +144,11 @@ __host__ void gridFindMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const Ma
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::maxVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::maxVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
@@ -151,11 +160,11 @@ __host__ void gridFindMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream&
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::maxVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::maxVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
@@ -170,11 +179,11 @@ __host__ void gridFindMinMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, const
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_glob_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
@@ -187,11 +196,51 @@ __host__ void gridFindMinMaxVal_(const SrcPtr& src, GpuMat_<ResType>& dst, Strea
const int rows = getRows(src);
const int cols = getCols(src);
grid_glob_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::minMaxVal<Policy>(shrinkPtr(src),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridMinMaxLoc_(const SrcPtr& src, GpuMat_<ResType>& valBuf, GpuMat_<int>& locBuf, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
dim3 grid, block;
grid_minmaxloc_detail::getLaunchCfg<Policy>(rows, cols, block, grid);
valBuf.create(2, grid.x * grid.y);
locBuf.create(2, grid.x * grid.y);
grid_minmaxloc_detail::minMaxLoc<Policy>(shrinkPtr(src),
valBuf[0], valBuf[1], locBuf[0], locBuf[1],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
__host__ void gridMinMaxLoc_(const SrcPtr& src, GpuMat_<ResType>& valBuf, GpuMat_<int>& locBuf, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dim3 grid, block;
grid_minmaxloc_detail::getLaunchCfg<Policy>(rows, cols, block, grid);
valBuf.create(2, grid.x * grid.y);
locBuf.create(2, grid.x * grid.y);
grid_minmaxloc_detail::minMaxLoc<Policy>(shrinkPtr(src),
valBuf[0], valBuf[1], locBuf[0], locBuf[1],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
@@ -209,11 +258,11 @@ __host__ void gridCountNonZero_(const SrcPtr& src, GpuMat_<ResType>& dst, const
not_equal_to<src_type> ne_op;
const src_type zero = VecTraits<src_type>::all(0);
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
dst[0],
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename ResType>
@@ -229,11 +278,11 @@ __host__ void gridCountNonZero_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream
not_equal_to<src_type> ne_op;
const src_type zero = VecTraits<src_type>::all(0);
grid_glob_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
grid_reduce_detail::sum<Policy>(shrinkPtr(transformPtr(src, bind2nd(ne_op, zero))),
dst[0],
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
// default policy
@@ -297,6 +346,18 @@ __host__ void gridFindMinMaxVal(const SrcPtr& src, GpuMat_<ResType>& dst, Stream
gridFindMinMaxVal_<DefaultGlobReducePolicy>(src, dst, stream);
}
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridMinMaxLoc(const SrcPtr& src, GpuMat_<ResType>& valBuf, GpuMat_<int>& locBuf, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridMinMaxLoc_<DefaultGlobReducePolicy>(src, valBuf, locBuf, mask, stream);
}
template <class SrcPtr, typename ResType>
__host__ void gridMinMaxLoc(const SrcPtr& src, GpuMat_<ResType>& valBuf, GpuMat_<int>& locBuf, Stream& stream = Stream::Null())
{
gridMinMaxLoc_<DefaultGlobReducePolicy>(src, valBuf, locBuf, stream);
}
template <class SrcPtr, typename ResType, class MaskPtr>
__host__ void gridCountNonZero(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{

View File

@@ -49,6 +49,7 @@
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "../util/limits.hpp"
#include "../util/saturate_cast.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
@@ -62,6 +63,11 @@ template <typename T> struct Sum : plus<T>
{
typedef T work_type;
template <typename U> struct rebind
{
typedef Sum<U> other;
};
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(0);
@@ -77,14 +83,19 @@ template <typename T> struct Avg : plus<T>
{
typedef T work_type;
template <typename U> struct rebind
{
typedef Avg<U> other;
};
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(0);
}
__device__ __forceinline__ static T result(T r, int sz)
__device__ __forceinline__ static T result(T r, float sz)
{
return r / sz;
return saturate_cast<T>(r / sz);
}
};
@@ -92,6 +103,11 @@ template <typename T> struct Min : minimum<T>
{
typedef T work_type;
template <typename U> struct rebind
{
typedef Min<U> other;
};
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
@@ -107,6 +123,11 @@ template <typename T> struct Max : maximum<T>
{
typedef T work_type;
template <typename U> struct rebind
{
typedef Max<U> other;
};
__device__ __forceinline__ static T initialValue()
{
return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
@@ -158,7 +179,7 @@ __host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, cons
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
createContinuous(rows, 1, DataType<ResType>::type, dst);
dst.create(1, rows);
grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
dst[0],
@@ -173,7 +194,7 @@ __host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, Stre
const int rows = getRows(src);
const int cols = getCols(src);
createContinuous(rows, 1, DataType<ResType>::type, dst);
dst.create(1, rows);
grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
dst[0],

View File

@@ -51,6 +51,7 @@
#include "../util/vec_traits.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/glob.hpp"
#include "../ptr2d/mask.hpp"
#include "detail/split_merge.hpp"
@@ -75,6 +76,24 @@ __host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const Ma
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ void gridMerge_(const SrcPtrTuple& src, const GlobPtrSz<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<DstType>::cn == tuple_size<SrcPtrTuple>::value, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_split_merge_detail::MergeImpl<VecTraits<DstType>::cn, Policy>::merge(shrinkPtr(src),
shrinkPtr(dst),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename DstType>
__host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
@@ -92,6 +111,23 @@ __host__ void gridMerge_(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream&
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtrTuple, typename DstType>
__host__ void gridMerge_(const SrcPtrTuple& src, const GlobPtrSz<DstType>& dst, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<DstType>::cn == tuple_size<SrcPtrTuple>::value, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
grid_split_merge_detail::MergeImpl<VecTraits<DstType>::cn, Policy>::merge(shrinkPtr(src),
shrinkPtr(dst),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
@@ -132,6 +168,25 @@ __host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[2], const Ma
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[2], const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst[0]) == rows && getCols(dst[0]) == cols );
CV_Assert( getRows(dst[1]) == rows && getCols(dst[1]) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
@@ -168,6 +223,24 @@ __host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[2], Stream&
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[2], Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 2, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst[0]) == rows && getCols(dst[0]) == cols );
CV_Assert( getRows(dst[1]) == rows && getCols(dst[1]) == cols );
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
@@ -210,6 +283,26 @@ __host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[3], const Ma
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[3], const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst[0]) == rows && getCols(dst[0]) == cols );
CV_Assert( getRows(dst[1]) == rows && getCols(dst[1]) == cols );
CV_Assert( getRows(dst[2]) == rows && getCols(dst[2]) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, Stream& stream = Stream::Null())
{
@@ -248,6 +341,25 @@ __host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[3], Stream&
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[3], Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 3, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst[0]) == rows && getCols(dst[0]) == cols );
CV_Assert( getRows(dst[1]) == rows && getCols(dst[1]) == cols );
CV_Assert( getRows(dst[2]) == rows && getCols(dst[2]) == cols );
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
@@ -283,10 +395,31 @@ __host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[4], const Ma
dst[0].create(rows, cols);
dst[1].create(rows, cols);
dst[2].create(rows, cols);
dst[4].create(rows, cols);
dst[3].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[4]),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[3]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit_(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[4], const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst[0]) == rows && getCols(dst[0]) == cols );
CV_Assert( getRows(dst[1]) == rows && getCols(dst[1]) == cols );
CV_Assert( getRows(dst[2]) == rows && getCols(dst[2]) == cols );
CV_Assert( getRows(dst[3]) == rows && getCols(dst[3]) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[3]),
shrinkPtr(mask),
rows, cols,
StreamAccessor::getStream(stream));
@@ -323,10 +456,30 @@ __host__ void gridSplit_(const SrcPtr& src, GpuMat_<DstType> (&dst)[4], Stream&
dst[0].create(rows, cols);
dst[1].create(rows, cols);
dst[2].create(rows, cols);
dst[4].create(rows, cols);
dst[3].create(rows, cols);
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[4]),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[3]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridSplit_(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[4], Stream& stream = Stream::Null())
{
CV_StaticAssert( VecTraits<typename PtrTraits<SrcPtr>::value_type>::cn == 4, "" );
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst[0]) == rows && getCols(dst[0]) == cols );
CV_Assert( getRows(dst[1]) == rows && getCols(dst[1]) == cols );
CV_Assert( getRows(dst[2]) == rows && getCols(dst[2]) == cols );
CV_Assert( getRows(dst[3]) == rows && getCols(dst[3]) == cols );
grid_split_merge_detail::split<Policy>(shrinkPtr(src),
shrinkPtr(dst[0]), shrinkPtr(dst[1]), shrinkPtr(dst[2]), shrinkPtr(dst[3]),
WithOutMask(),
rows, cols,
StreamAccessor::getStream(stream));
@@ -348,12 +501,24 @@ __host__ void gridMerge(const SrcPtrTuple& src, GpuMat_<DstType>& dst, const Mas
gridMerge_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtrTuple, typename DstType, class MaskPtr>
__host__ void gridMerge(const SrcPtrTuple& src, const GlobPtrSz<DstType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridMerge_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtrTuple, typename DstType>
__host__ void gridMerge(const SrcPtrTuple& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
gridMerge_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtrTuple, typename DstType>
__host__ void gridMerge(const SrcPtrTuple& src, const GlobPtrSz<DstType>& dst, Stream& stream = Stream::Null())
{
gridMerge_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType, class MaskPtr>
__host__ void gridSplit(const SrcPtr& src, const tuple< GpuMat_<DstType>&, GpuMat_<DstType>& >& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
{
@@ -396,12 +561,24 @@ __host__ void gridSplit(const SrcPtr& src, GpuMat_<DstType> (&dst)[COUNT], const
gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType, int COUNT, class MaskPtr>
__host__ void gridSplit(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[COUNT], const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, mask, stream);
}
template <class SrcPtr, typename DstType, int COUNT>
__host__ void gridSplit(const SrcPtr& src, GpuMat_<DstType> (&dst)[COUNT], Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType, int COUNT>
__host__ void gridSplit(const SrcPtr& src, GlobPtrSz<DstType> (&dst)[COUNT], Stream& stream = Stream::Null())
{
gridSplit_<DefaultSplitMergePolicy>(src, dst, stream);
}
}}
#endif

View File

@@ -58,7 +58,7 @@
namespace cv { namespace cudev {
template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformUnary_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
@@ -67,11 +67,11 @@ __host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnO
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_unary<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformUnary_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const UnOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
@@ -79,33 +79,33 @@ __host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, c
CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_unary<Policy>(shrinkPtr(src), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class UnOp>
__host__ void gridTransform_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
__host__ void gridTransformUnary_(const SrcPtr& src, GpuMat_<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_unary<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType, class UnOp>
__host__ void gridTransform_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
__host__ void gridTransformUnary_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const UnOp& op, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
grid_transform_detail::transform<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_unary<Policy>(shrinkPtr(src), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformBinary_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src1);
const int cols = getCols(src1);
@@ -115,11 +115,11 @@ __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<D
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_binary<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformBinary_(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz<DstType>& dst, const BinOp& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
const int rows = getRows(src1);
const int cols = getCols(src1);
@@ -128,11 +128,11 @@ __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, const Glo
CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_binary<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, shrinkPtr(mask), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp>
__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
__host__ void gridTransformBinary_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
{
const int rows = getRows(src1);
const int cols = getCols(src1);
@@ -141,11 +141,11 @@ __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<D
dst.create(rows, cols);
grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_binary<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp>
__host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GlobPtrSz<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
__host__ void gridTransformBinary_(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz<DstType>& dst, const BinOp& op, Stream& stream = Stream::Null())
{
const int rows = getRows(src1);
const int cols = getCols(src1);
@@ -153,11 +153,11 @@ __host__ void gridTransform_(const SrcPtr1& src1, const SrcPtr2& src2, GlobPtrSz
CV_Assert( getRows(dst) == rows && getCols(dst) == cols );
CV_Assert( getRows(src2) == rows && getCols(src2) == cols );
grid_transform_detail::transform<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
grid_transform_detail::transform_binary<Policy>(shrinkPtr(src1), shrinkPtr(src2), shrinkPtr(dst), op, WithOutMask(), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
@@ -178,7 +178,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMa
}
template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
@@ -198,7 +198,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, Glob
}
template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
@@ -217,7 +217,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMa
}
template <class Policy, class SrcPtr, typename D0, typename D1, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 2, "" );
@@ -236,7 +236,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, Glob
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
@@ -258,7 +258,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMa
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
@@ -279,7 +279,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, Glob
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
@@ -299,7 +299,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMa
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 3, "" );
@@ -319,7 +319,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, Glob
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
@@ -342,7 +342,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMa
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
@@ -364,7 +364,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, Glob
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
@@ -385,7 +385,7 @@ __host__ void gridTransform_(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMa
}
template <class Policy, class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
__host__ void gridTransform_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple_(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
CV_StaticAssert( tuple_size<OpTuple>::value == 4, "" );
@@ -417,123 +417,123 @@ struct DefaultTransformPolicy
};
template <class SrcPtr, typename DstType, class Op, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformUnary(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformUnary_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename DstType, class Op, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformUnary(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformUnary_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename DstType, class Op>
__host__ void gridTransform(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
__host__ void gridTransformUnary(const SrcPtr& src, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformUnary_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename DstType, class Op>
__host__ void gridTransform(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
__host__ void gridTransformUnary(const SrcPtr& src, const GlobPtrSz<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformUnary_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class Op, class MaskPtr>
__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformBinary(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
gridTransformBinary_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class Op, class MaskPtr>
__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, const GlobPtrSz<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformBinary(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz<DstType>& dst, const Op& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
gridTransformBinary_<DefaultTransformPolicy>(src1, src2, dst, op, mask, stream);
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class Op>
__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
__host__ void gridTransformBinary(const SrcPtr1& src1, const SrcPtr2& src2, GpuMat_<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
gridTransformBinary_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
}
template <class SrcPtr1, class SrcPtr2, typename DstType, class Op>
__host__ void gridTransform(const SrcPtr1& src1, const SrcPtr1& src2, const GlobPtrSz<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
__host__ void gridTransformBinary(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtrSz<DstType>& dst, const Op& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
gridTransformBinary_<DefaultTransformPolicy>(src1, src2, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple, class MaskPtr>
__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, const MaskPtr& mask, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, mask, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, mask, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GpuMat_<D0>&, GpuMat_<D1>&, GpuMat_<D2>&, GpuMat_<D3>& >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
}
template <class SrcPtr, typename D0, typename D1, typename D2, typename D3, class OpTuple>
__host__ void gridTransform(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
__host__ void gridTransformTuple(const SrcPtr& src, const tuple< GlobPtrSz<D0>, GlobPtrSz<D1>, GlobPtrSz<D2>, GlobPtrSz<D3> >& dst, const OpTuple& op, Stream& stream = Stream::Null())
{
gridTransform_<DefaultTransformPolicy>(src, dst, op, stream);
gridTransformTuple_<DefaultTransformPolicy>(src, dst, op, stream);
}
}}

View File

@@ -49,19 +49,53 @@
#include "../common.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/glob.hpp"
#include "detail/transpose.hpp"
namespace cv { namespace cudev {
template <class SrcPtr, typename DstType>
__host__ void gridTranspose(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridTranspose_(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
dst.create(cols, rows);
transpose_detail::transpose(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
transpose_detail::transpose<Policy>(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
}
template <class Policy, class SrcPtr, typename DstType>
__host__ void gridTranspose_(const SrcPtr& src, const GlobPtrSz<DstType>& dst, Stream& stream = Stream::Null())
{
const int rows = getRows(src);
const int cols = getCols(src);
CV_Assert( getRows(dst) == cols && getCols(dst) == rows );
transpose_detail::transpose<Policy>(shrinkPtr(src), shrinkPtr(dst), rows, cols, StreamAccessor::getStream(stream));
}
// Default Policy
struct DefaultTransposePolicy
{
enum {
tile_dim = 16,
block_dim_y = 16
};
};
template <class SrcPtr, typename DstType>
__host__ void gridTranspose(const SrcPtr& src, GpuMat_<DstType>& dst, Stream& stream = Stream::Null())
{
gridTranspose_<DefaultTransposePolicy>(src, dst, stream);
}
template <class SrcPtr, typename DstType>
__host__ void gridTranspose(const SrcPtr& src, const GlobPtrSz<DstType>& dst, Stream& stream = Stream::Null())
{
gridTranspose_<DefaultTransposePolicy>(src, dst, stream);
}
}}

View File

@@ -47,6 +47,7 @@
#define __OPENCV_CUDEV_PTR2D_LUT_HPP__
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "../grid/copy.hpp"
#include "traits.hpp"
#include "gpumat.hpp"
@@ -63,7 +64,8 @@ template <class SrcPtr, class TablePtr> struct LutPtr
__device__ __forceinline__ typename PtrTraits<TablePtr>::value_type operator ()(typename PtrTraits<SrcPtr>::index_type y, typename PtrTraits<SrcPtr>::index_type x) const
{
return tbl(0, src(y, x));
typedef typename PtrTraits<TablePtr>::index_type tbl_index_type;
return tbl(VecTraits<tbl_index_type>::all(0), src(y, x));
}
};
@@ -81,8 +83,6 @@ template <class SrcPtr, class TablePtr> struct LutPtrSz : LutPtr<SrcPtr, TablePt
template <class SrcPtr, class TablePtr>
__host__ LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> lutPtr(const SrcPtr& src, const TablePtr& tbl)
{
CV_Assert( getRows(tbl) == 1 );
LutPtrSz<typename PtrTraits<SrcPtr>::ptr_type, typename PtrTraits<TablePtr>::ptr_type> ptr;
ptr.src = shrinkPtr(src);
ptr.tbl = shrinkPtr(tbl);

View File

@@ -62,6 +62,42 @@ struct WithOutMask
}
};
template <class MaskPtr> struct SingleMaskChannels
{
typedef typename PtrTraits<MaskPtr>::value_type value_type;
typedef typename PtrTraits<MaskPtr>::index_type index_type;
MaskPtr mask;
int channels;
__device__ __forceinline__ value_type operator()(index_type y, index_type x) const
{
return mask(y, x / channels);
}
};
template <class MaskPtr> struct SingleMaskChannelsSz : SingleMaskChannels<MaskPtr>
{
int rows, cols;
};
template <class MaskPtr>
__host__ SingleMaskChannelsSz<typename PtrTraits<MaskPtr>::ptr_type>
singleMaskChannels(const MaskPtr& mask, int channels)
{
SingleMaskChannelsSz<typename PtrTraits<MaskPtr>::ptr_type> ptr;
ptr.mask = shrinkPtr(mask);
ptr.channels = channels;
ptr.rows = getRows(mask);
ptr.cols = getCols(mask) * channels;
return ptr;
}
template <class MaskPtr> struct PtrTraits< SingleMaskChannelsSz<MaskPtr> > : PtrTraitsBase<SingleMaskChannelsSz<MaskPtr>, SingleMaskChannels<MaskPtr> >
{
};
}}
#endif

View File

@@ -194,10 +194,23 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
}
namespace vec_math_detail
{
__device__ __forceinline__ schar abs_(schar val)
{
return (schar) ::abs((int) val);
}
__device__ __forceinline__ short abs_(short val)
{
return (short) ::abs((int) val);
}
}
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, vec_math_detail::abs_, char, char)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, vec_math_detail::abs_, short, short)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)

View File

@@ -70,7 +70,7 @@ CV_CUDEV_MAKE_VEC_INST(double)
#undef CV_CUDEV_MAKE_VEC_INST
template<> struct MakeVec<schar, 1> { typedef char type; };
template<> struct MakeVec<schar, 1> { typedef schar type; };
template<> struct MakeVec<schar, 2> { typedef char2 type; };
template<> struct MakeVec<schar, 3> { typedef char3 type; };
template<> struct MakeVec<schar, 4> { typedef char4 type; };