added gpu threshold.

This commit is contained in:
Vladislav Vinogradov
2011-01-24 10:11:02 +00:00
parent 4c4ff882ad
commit 8abdb3721f
17 changed files with 768 additions and 370 deletions

View File

@@ -41,7 +41,8 @@
//M*/
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
@@ -468,4 +469,112 @@ namespace cv { namespace gpu { namespace mathfunc
template void max_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
// threshold
class ThreshOp
{
public:
ThreshOp(float thresh_, float maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
protected:
float thresh;
float maxVal;
};
class ThreshBinary : public ThreshOp
{
public:
ThreshBinary(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? saturate_cast<T>(maxVal) : 0;
}
};
class ThreshBinaryInv : public ThreshOp
{
public:
ThreshBinaryInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? 0 : saturate_cast<T>(maxVal);
}
};
class ThreshTrunc : public ThreshOp
{
public:
ThreshTrunc(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return saturate_cast<T>(fmin((float)src, thresh));
}
};
class ThreshToZero : public ThreshOp
{
public:
ThreshToZero(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? src : 0;
}
};
class ThreshToZeroInv : public ThreshOp
{
public:
ThreshToZeroInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
template<typename T>
__device__ T operator()(const T& src) const
{
return (float)src > thresh ? 0 : src;
}
};
template <class Op, typename T>
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal,
cudaStream_t stream)
{
Op op(thresh, maxVal);
transform(src, dst, op, stream);
}
template <typename T>
void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,
cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal,
cudaStream_t stream);
static const caller_t callers[] =
{
threshold_caller<ThreshBinary, T>,
threshold_caller<ThreshBinaryInv, T>,
threshold_caller<ThreshTrunc, T>,
threshold_caller<ThreshToZero, T>,
threshold_caller<ThreshToZeroInv, T>
};
callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
}
template void threshold_gpu<uchar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<schar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<ushort>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<short>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
}}}

View File

@@ -43,7 +43,7 @@
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;

View File

@@ -42,6 +42,7 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/transform.hpp"
using namespace cv::gpu::device;
@@ -55,63 +56,6 @@ namespace cv { namespace gpu { namespace matrix_operations {
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
struct ReadWriteTraits
{
enum {shift=1};
typedef T read_type;
typedef DT write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 1, 1>
{
enum {shift=4};
typedef char4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 2, 1>
{
enum {shift=4};
typedef short4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 4, 1>
{
enum {shift=4};
typedef int4 read_type;
typedef char4 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 1, 2>
{
enum {shift=2};
typedef char2 read_type;
typedef short2 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 2, 2>
{
enum {shift=2};
typedef short2 read_type;
typedef short2 write_type;
};
template <typename T, typename DT>
struct ReadWriteTraits<T, DT, 4, 2>
{
enum {shift=2};
typedef int2 read_type;
typedef short2 write_type;
};
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
@@ -276,88 +220,64 @@ namespace cv { namespace gpu { namespace matrix_operations {
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T, typename DT>
__global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
template <typename T, typename D>
class Convertor
{
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
public:
Convertor(double alpha_, double beta_): alpha(alpha_), beta(beta_) {}
const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
if (y < height)
__device__ D operator()(const T& src)
{
const T* src = (const T*)(srcmat + src_step * y);
DT* dst = (DT*)(dstmat + dst_step * y);
if ((x * shift) + shift - 1 < width)
{
read_type srcn_el = ((read_type*)src)[x];
write_type dstn_el;
const T* src1_el = (const T*) &srcn_el;
DT* dst1_el = (DT*) &dstn_el;
for (int i = 0; i < shift; ++i)
dst1_el[i] = saturate_cast<DT>(alpha * src1_el[i] + beta);
((write_type*)dst)[x] = dstn_el;
}
else
{
for (int i = 0; i < shift - 1; ++i)
if ((x * shift) + i < width)
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
}
return saturate_cast<D>(alpha * src + beta);
}
}
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
template<typename T, typename DT>
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
private:
double alpha, beta;
};
template<typename T, typename D>
void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
{
const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
dim3 block(32, 8);
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
Convertor<T, D> op(alpha, beta);
transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
}
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta,
cudaStream_t stream = 0)
{
static CvtFunc tab[8][8] =
{
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta,
cudaStream_t stream);
{cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
{cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
{cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
{cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
{cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
{cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
{0,0,0,0,0,0,0,0}
static const caller_t tab[8][8] =
{
{cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
{cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
{cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
{cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
{cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
{cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
{cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
{0,0,0,0,0,0,0,0}
};
CvtFunc func = tab[sdepth][ddepth];
if (func == 0)
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
caller_t func = tab[sdepth][ddepth];
if (!func)
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, alpha, beta, stream);
}
}}}

View File

@@ -43,7 +43,7 @@
#include "opencv2/gpu/device/limits_gpu.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;

View File

@@ -46,7 +46,6 @@
//M*/
#include "internal_shared.hpp"
#include "surf_key_point.h"
#include "opencv2/gpu/device/limits_gpu.hpp"
using namespace cv::gpu;

View File

@@ -1,54 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_SURF_KEY_POINT_H__
#define __OPENCV_SURF_KEY_POINT_H__
namespace cv
{
namespace gpu
{
}
}
#endif // __OPENCV_SURF_KEY_POINT_H__

View File

@@ -1,130 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
//! Mask accessor
template<class T> struct MaskReader_
{
PtrStep_<T> mask;
explicit MaskReader_(PtrStep_<T> mask): mask(mask) {}
__device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
};
//! Stub mask accessor
struct NoMask
{
__device__ bool operator()(int y, int x) const { return true; }
};
//! Transform kernels
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
T src_data = src.ptr(y)[x];
dst.ptr(y)[x] = op(src_data);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
T1 src1_data = src1.ptr(y)[x];
T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
}}}
namespace cv
{
namespace gpu
{
template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src1.cols, threads.x);
grid.y = divUp(src1.rows, threads.y);
device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__