added gpu threshold.

This commit is contained in:
Vladislav Vinogradov 2011-01-24 10:11:02 +00:00
parent 4c4ff882ad
commit 8abdb3721f
17 changed files with 768 additions and 370 deletions

View File

@ -314,13 +314,17 @@ See also: \cvCppCross{cvtColor}.
Applies a fixed-level threshold to each array element.
\cvdefCpp{
double threshold(const GpuMat\& src, GpuMat\& dst, double thresh);
double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double maxval, int type);\newline
double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double maxval, int type, const Stream\& stream);
}
\begin{description}
\cvarg{src}{Source array. Supports only \texttt{CV\_32FC1} type.}
\cvarg{src}{Source array (single-channel, \texttt{CV\_64F} depth doesn't supported).}
\cvarg{dst}{Destination array; will have the same size and the same type as \texttt{src}.}
\cvarg{thresh}{Threshold value.}
\cvarg{maxVal}{Maximum value to use with \texttt{THRESH\_BINARY} and \texttt{THRESH\_BINARY\_INV} thresholding types.}
\cvarg{thresholdType}{Thresholding type. For details see \cvCppCross{threshold}. \texttt{THRESH\_OTSU} thresholding type doesn't supported.}
\cvarg{stream}{Stream for the asynchronous version.}
\end{description}
See also: \cvCppCross{threshold}.

View File

@ -587,9 +587,10 @@ namespace cv
//! async version
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const Stream& stream);
//! applies fixed threshold to the image.
//! Now supports only THRESH_TRUNC threshold type and one channels float source.
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh);
//! applies fixed threshold to the image
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type);
//! async version
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, const Stream& stream);
//! resizes the image
//! Supports INTER_NEAREST, INTER_LINEAR

View File

@ -41,7 +41,8 @@
//M*/
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
@ -468,4 +469,112 @@ namespace cv { namespace gpu { namespace mathfunc
template void max_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
// threshold
class ThreshOp
{
public:
ThreshOp(float thresh_, float maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
protected:
float thresh;
float maxVal;
};
class ThreshBinary : public ThreshOp
{
public:
ThreshBinary(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? saturate_cast<T>(maxVal) : 0;
}
};
class ThreshBinaryInv : public ThreshOp
{
public:
ThreshBinaryInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? 0 : saturate_cast<T>(maxVal);
}
};
class ThreshTrunc : public ThreshOp
{
public:
ThreshTrunc(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return saturate_cast<T>(fmin((float)src, thresh));
}
};
class ThreshToZero : public ThreshOp
{
public:
ThreshToZero(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? src : 0;
}
};
class ThreshToZeroInv : public ThreshOp
{
public:
ThreshToZeroInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? 0 : src;
}
};
template <class Op, typename T>
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal,
cudaStream_t stream)
{
Op op(thresh, maxVal);
transform(src, dst, op, stream);
}
template <typename T>
void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,
cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal,
cudaStream_t stream);
static const caller_t callers[] =
{
threshold_caller<ThreshBinary, T>,
threshold_caller<ThreshBinaryInv, T>,
threshold_caller<ThreshTrunc, T>,
threshold_caller<ThreshToZero, T>,
threshold_caller<ThreshToZeroInv, T>
};
callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
}
template void threshold_gpu<uchar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<schar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<ushort>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<short>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
}}}

View File

@ -43,7 +43,7 @@
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;

View File

@ -42,6 +42,7 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/transform.hpp"
using namespace cv::gpu::device;
@ -55,63 +56,6 @@ namespace cv { namespace gpu { namespace matrix_operations {
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
struct ReadWriteTraits
{
enum {shift=1};
typedef T read_type;
typedef DT write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 1, 1>
{
enum {shift=4};
typedef char4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 2, 1>
{
enum {shift=4};
typedef short4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 4, 1>
{
enum {shift=4};
typedef int4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 1, 2>
{
enum {shift=2};
typedef char2 read_type;
typedef short2 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 2, 2>
{
enum {shift=2};
typedef short2 read_type;
typedef short2 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 4, 2>
{
enum {shift=2};
typedef int2 read_type;
typedef short2 write_type;
};
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
@ -276,88 +220,64 @@ namespace cv { namespace gpu { namespace matrix_operations {
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T, typename DT>
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
template <typename T, typename D>
class Convertor
{
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
public:
Convertor(double alpha_, double beta_): alpha(alpha_), beta(beta_) {}
const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
if (y < height)
__device__ D operator()(const T& src)
{
const T* src = (const T*)(srcmat + src_step * y);
DT* dst = (DT*)(dstmat + dst_step * y);
if ((x * shift) + shift - 1 < width)
{
read_type srcn_el = ((read_type*)src)[x];
write_type dstn_el;
const T* src1_el = (const T*) &srcn_el;
DT* dst1_el = (DT*) &dstn_el;
for (int i = 0; i < shift; ++i)
dst1_el[i] = saturate_cast<DT>(alpha * src1_el[i] + beta);
((write_type*)dst)[x] = dstn_el;
}
else
{
for (int i = 0; i < shift - 1; ++i)
if ((x * shift) + i < width)
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
}
return saturate_cast<D>(alpha * src + beta);
}
}
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
template<typename T, typename DT>
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
private:
double alpha, beta;
};
template<typename T, typename D>
void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
{
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
dim3 block(32, 8);
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
Convertor<T, D> op(alpha, beta);
transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
}
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta,
cudaStream_t stream = 0)
{
static CvtFunc tab[8][8] =
{
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta,
cudaStream_t stream);
{cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
{cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
{cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
{cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
{cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
{cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
{0,0,0,0,0,0,0,0}
static const caller_t tab[8][8] =
{
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
{cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
{cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
{cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
{cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
{cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
{cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
{0,0,0,0,0,0,0,0}
};
CvtFunc func = tab[sdepth][ddepth];
if (func == 0)
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
caller_t func = tab[sdepth][ddepth];
if (!func)
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, alpha, beta, stream);
}
}}}

View File

@ -43,7 +43,7 @@
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;

View File

@ -46,7 +46,6 @@
//M*/
#include "internal_shared.hpp"
#include "surf_key_point.h"
#include "opencv2/gpu/device/limits_gpu.hpp"
using namespace cv::gpu;

View File

@ -1,54 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_SURF_KEY_POINT_H__
#define __OPENCV_SURF_KEY_POINT_H__
namespace cv
{
namespace gpu
{
}
}
#endif // __OPENCV_SURF_KEY_POINT_H__

View File

@ -1,130 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
//! Mask accessor
template<class T> struct MaskReader_
{
PtrStep_<T> mask;
explicit MaskReader_(PtrStep_<T> mask): mask(mask) {}
__device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
};
//! Stub mask accessor
struct NoMask
{
__device__ bool operator()(int y, int x) const { return true; }
};
//! Transform kernels
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
T src_data = src.ptr(y)[x];
dst.ptr(y)[x] = op(src_data);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
T1 src1_data = src1.ptr(y)[x];
T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
}}}
namespace cv
{
namespace gpu
{
template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src1.cols, threads.x);
grid.y = divUp(src1.rows, threads.y);
device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__

View File

@ -80,7 +80,7 @@ namespace cv
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
}
}
}
@ -204,7 +204,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc = &(temp = src);
dst.create( src.size(), rtype );
matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta, impl->stream);
matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
}

View File

@ -74,6 +74,8 @@ void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int) {throw_nogpu(); return 0.0;}
double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, const Stream&) {throw_nogpu(); return 0.0;}
#else
@ -696,4 +698,72 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& st
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
// threshold
namespace cv { namespace gpu { namespace mathfunc
{
template <typename T>
void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,
cudaStream_t stream);
}}}
namespace
{
void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type,
cudaStream_t stream = 0)
{
using namespace cv::gpu::mathfunc;
typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,
cudaStream_t stream);
static const caller_t callers[] =
{
threshold_gpu<unsigned char>, threshold_gpu<signed char>,
threshold_gpu<unsigned short>, threshold_gpu<short>, threshold_gpu<int>, threshold_gpu<float>, 0
};
CV_Assert(src.channels() == 1 && src.depth() < CV_64F);
CV_Assert(type <= THRESH_TOZERO_INV);
dst.create(src.size(), src.type());
if (src.depth() != CV_32F)
{
thresh = cvFloor(thresh);
maxVal = cvRound(maxVal);
}
callers[src.depth()](src, dst, static_cast<float>(thresh), static_cast<float>(maxVal), type, stream);
}
}
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type)
{
if (src.type() == CV_32FC1 && type == THRESH_TRUNC)
{
dst.create(src.size(), src.type());
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
}
else
{
threshold_caller(src, dst, thresh, maxVal, type);
}
return thresh;
}
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, const Stream& stream)
{
threshold_caller(src, dst, thresh, maxVal, type, StreamAccessor::getStream(stream));
return thresh;
}
#endif

View File

@ -55,7 +55,6 @@ void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, const Stream&) { throw_nogpu(); }
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&) { throw_nogpu(); }
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, const Stream&) { throw_nogpu(); }
double cv::gpu::threshold(const GpuMat&, GpuMat&, double) { throw_nogpu(); return 0.0; }
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int) { throw_nogpu(); }
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&) { throw_nogpu(); }
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
@ -241,25 +240,6 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
// threshold
double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)
{
CV_Assert(src.type() == CV_32FC1);
dst.create( src.size(), src.type() );
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,
dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
return thresh;
}
////////////////////////////////////////////////////////////////////////
// resize

View File

@ -90,7 +90,7 @@ namespace cv
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
}
}
}
@ -193,7 +193,7 @@ namespace
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
{
matrix_operations::convert_to(src, src.depth(), dst, dst.depth(), src.channels(), 1.0, 0.0);
matrix_operations::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
}
}
@ -222,7 +222,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
dst.create( size(), rtype );
if (!noScale)
matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta);
matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta);
else
{
typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst);

View File

@ -0,0 +1,433 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "internal_shared.hpp"
#include "vecmath.hpp"
namespace cv { namespace gpu { namespace device
{
//! Mask accessor
class MaskReader
{
public:
explicit MaskReader(const PtrStep& mask_): mask(mask_) {}
__device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
private:
PtrStep mask;
};
struct NoMask
{
__device__ bool operator()(int y, int x) const { return true; }
};
//! Read Write Traits
template <size_t src_elem_size, size_t dst_elem_size>
struct UnReadWriteTraits_
{
enum {shift=1};
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 1>
{
enum {shift=4};
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 2>
{
enum {shift=2};
};
template <typename T, typename D> struct UnReadWriteTraits
{
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
typedef typename TypeVec<T, shift>::vec_t read_type;
typedef typename TypeVec<D, shift>::vec_t write_type;
};
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
struct BinReadWriteTraits_
{
enum {shift=1};
};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
{
enum {shift=4};
};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
{
enum {shift=2};
};
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
{
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
typedef typename TypeVec<T1, shift>::vec_t read_type1;
typedef typename TypeVec<T2, shift>::vec_t read_type2;
typedef typename TypeVec<D , shift>::vec_t write_type;
};
//! Transform kernels
template <int shift> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
{
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
const int shift = UnReadWriteTraits<T, D>::shift;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + shift - 1 < src_.cols)
{
read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el;
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
dst.ptr(y)[x] = op(src.ptr(y)[x]);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
const Mask mask, BinOp op)
{
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + shift - 1 < src1_.cols)
{
read_type1 src1_n_el = ((const read_type1*)src1)[x];
read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el;
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst,
const Mask mask, BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
T1 src1_data = src1.ptr(y)[x];
T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
}}}
namespace cv
{
namespace gpu
{
template <bool UseSmart> struct TransformChooser;
template<> struct TransformChooser<false>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
cudaStream_t stream = 0)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
device::transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, const Mask& mask, cudaStream_t stream = 0)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src1.cols, threads.x);
grid.y = divUp(src1.rows, threads.y);
device::transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
};
template<> struct TransformChooser<true>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
cudaStream_t stream = 0)
{
const int shift = device::UnReadWriteTraits<T, D>::shift;
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x * shift);
grid.y = divUp(src.rows, threads.y);
device::transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, const Mask& mask, cudaStream_t stream = 0)
{
const int shift = device::BinReadWriteTraits<T1, T2, D>::shift;
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src1.cols, threads.x * shift);
grid.y = divUp(src1.rows, threads.y);
device::transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
};
template <typename T, typename D, typename UnOp, typename Mask>
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
cudaStream_t stream = 0)
{
TransformChooser<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream);
}
template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream = 0)
{
transform_caller(src, dst, op, device::NoMask(), stream);
}
template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStep& mask, UnOp op,
cudaStream_t stream = 0)
{
transform_caller(src, dst, op, device::MaskReader(mask), stream);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, const Mask& mask, cudaStream_t stream = 0)
{
TransformChooser<device::VecTraits<T1>::cn == 1 && device::VecTraits<T2>::cn == 1 && device::VecTraits<D>::cn == 1 && device::BinReadWriteTraits<T1, T2, D>::shift != 1>::call(src1, src2, dst, op, mask, stream);
}
template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, cudaStream_t stream = 0)
{
transform_caller(src1, src2, dst, op, device::NoMask(), stream);
}
template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
const PtrStep& mask, BinOp op, cudaStream_t stream = 0)
{
transform_caller(src1, src2, dst, op, device::MaskReader(mask), stream);
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__

View File

@ -64,12 +64,16 @@ namespace cv
template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; };
template<> struct TypeVec<char, 1> { typedef char vec_t; };
template<> struct TypeVec<schar, 1> { typedef char vec_t; };
template<> struct TypeVec<char1, 1> { typedef char1 vec_t; };
template<> struct TypeVec<char, 2> { typedef char2 vec_t; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_t; };
template<> struct TypeVec<char2, 2> { typedef char2 vec_t; };
template<> struct TypeVec<char, 3> { typedef char3 vec_t; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_t; };
template<> struct TypeVec<char3, 3> { typedef char3 vec_t; };
template<> struct TypeVec<char, 4> { typedef char4 vec_t; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_t; };
template<> struct TypeVec<char4, 4> { typedef char4 vec_t; };
template<> struct TypeVec<ushort, 1> { typedef ushort vec_t; };
@ -117,6 +121,15 @@ namespace cv
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
template<> struct TypeVec<float4, 4> { typedef float4 vec_t; };
template<> struct TypeVec<double, 1> { typedef double vec_t; };
template<> struct TypeVec<double1, 1> { typedef double1 vec_t; };
template<> struct TypeVec<double, 2> { typedef double2 vec_t; };
template<> struct TypeVec<double2, 2> { typedef double2 vec_t; };
template<> struct TypeVec<double, 3> { typedef double3 vec_t; };
template<> struct TypeVec<double3, 3> { typedef double3 vec_t; };
template<> struct TypeVec<double, 4> { typedef double4 vec_t; };
template<> struct TypeVec<double4, 4> { typedef double4 vec_t; };
template<typename T> struct VecTraits;
template<> struct VecTraits<uchar>
@ -162,33 +175,40 @@ namespace cv
static __device__ __host__ char all(char v) {return v;}
static __device__ __host__ char make(char x) {return x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_t;
enum {cn=1};
static __device__ __host__ schar all(schar v) {return v;}
static __device__ __host__ schar make(schar x) {return x;}
};
template<> struct VecTraits<char1>
{
typedef char elem_t;
typedef schar elem_t;
enum {cn=1};
static __device__ __host__ char1 all(char v) {return make_char1(v);}
static __device__ __host__ char1 make(char x) {return make_char1(x);}
static __device__ __host__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ char1 make(schar x) {return make_char1(x);}
};
template<> struct VecTraits<char2>
{
typedef char elem_t;
typedef schar elem_t;
enum {cn=2};
static __device__ __host__ char2 all(char v) {return make_char2(v, v);}
static __device__ __host__ char2 make(char x, char y) {return make_char2(x, y);}
static __device__ __host__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ char2 make(schar x, schar y) {return make_char2(x, y);}
};
template<> struct VecTraits<char3>
{
typedef char elem_t;
typedef schar elem_t;
enum {cn=3};
static __device__ __host__ char3 all(char v) {return make_char3(v, v, v);}
static __device__ __host__ char3 make(char x, char y, char z) {return make_char3(x, y, z);}
static __device__ __host__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
};
template<> struct VecTraits<char4>
{
typedef char elem_t;
typedef schar elem_t;
enum {cn=4};
static __device__ __host__ char4 all(char v) {return make_char4(v, v, v, v);}
static __device__ __host__ char4 make(char x, char y, char z, char w) {return make_char4(x, y, z, w);}
static __device__ __host__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
};
template<> struct VecTraits<ushort>
@ -371,6 +391,42 @@ namespace cv
static __device__ __host__ float4 make(float x, float y, float z, float w) {return make_float4(x, y, z, w);}
};
template<> struct VecTraits<double>
{
typedef double elem_t;
enum {cn=1};
static __device__ __host__ double all(double v) {return v;}
static __device__ __host__ double make(double x) {return x;}
};
template<> struct VecTraits<double1>
{
typedef double elem_t;
enum {cn=1};
static __device__ __host__ double1 all(double v) {return make_double1(v);}
static __device__ __host__ double1 make(double x) {return make_double1(x);}
};
template<> struct VecTraits<double2>
{
typedef double elem_t;
enum {cn=2};
static __device__ __host__ double2 all(double v) {return make_double2(v, v);}
static __device__ __host__ double2 make(double x, double y) {return make_double2(x, y);}
};
template<> struct VecTraits<double3>
{
typedef double elem_t;
enum {cn=3};
static __device__ __host__ double3 all(double v) {return make_double3(v, v, v);}
static __device__ __host__ double3 make(double x, double y, double z) {return make_double3(x, y, z);}
};
template<> struct VecTraits<double4>
{
typedef double elem_t;
enum {cn=4};
static __device__ __host__ double4 all(double v) {return make_double4(v, v, v, v);}
static __device__ __host__ double4 make(double x, double y, double z, double w) {return make_double4(x, y, z, w);}
};
template <int cn, typename VecD> struct SatCast;
template <typename VecD> struct SatCast<1, VecD>
{

View File

@ -70,7 +70,6 @@
#include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "npp_staging.h"
#include "surf_key_point.h"
#include "nvidia/NCV.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp"

View File

@ -180,30 +180,41 @@ void CV_GpuImageProcTest::run( int )
////////////////////////////////////////////////////////////////////////////////
// threshold
struct CV_GpuNppImageThresholdTest : public CV_GpuImageProcTest
struct CV_GpuImageThresholdTest : public CV_GpuImageProcTest
{
public:
CV_GpuNppImageThresholdTest() : CV_GpuImageProcTest( "GPU-NppImageThreshold", "threshold" ) {}
CV_GpuImageThresholdTest() : CV_GpuImageProcTest( "GPU-ImageThreshold", "threshold" ) {}
int test(const Mat& img)
{
if (img.type() != CV_32FC1)
if (img.type() != CV_8UC1 && img.type() != CV_32FC1)
{
ts->printf(CvTS::LOG, "\nUnsupported type\n");
return CvTS::OK;
}
const double maxVal = img.type() == CV_8UC1 ? 255 : 1.0;
cv::RNG rng(*ts->get_rng());
const double thresh = rng;
cv::Mat cpuRes;
cv::threshold(img, cpuRes, thresh, 0.0, THRESH_TRUNC);
int res = CvTS::OK;
GpuMat gpu1(img);
GpuMat gpuRes;
cv::gpu::threshold(gpu1, gpuRes, thresh);
for (int type = THRESH_BINARY; type <= THRESH_TOZERO_INV; ++type)
{
const double thresh = rng.uniform(0.0, maxVal);
return CheckNorm(cpuRes, gpuRes);
cv::Mat cpuRes;
cv::threshold(img, cpuRes, thresh, maxVal, type);
GpuMat gpu1(img);
GpuMat gpuRes;
cv::gpu::threshold(gpu1, gpuRes, thresh, maxVal, type);
if (CheckNorm(cpuRes, gpuRes) != CvTS::OK)
res = CvTS::FAIL_GENERIC;
}
return res;
}
};
@ -822,7 +833,7 @@ struct CV_GpuColumnSumTest: CvTest
// Placing all test definitions in one place
// makes us know about what tests are commented.
CV_GpuNppImageThresholdTest CV_GpuNppImageThreshold_test;
CV_GpuImageThresholdTest CV_GpuImageThreshold_test;
CV_GpuNppImageResizeTest CV_GpuNppImageResize_test;
CV_GpuNppImageCopyMakeBorderTest CV_GpuNppImageCopyMakeBorder_test;
CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;