added gpu threshold.

2011-01-24 10:11:02 +00:00
parent 4c4ff882ad
commit 8abdb3721f
17 changed files with 768 additions and 370 deletions
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@@ -41,7 +41,8 @@
 //M*/

 #include "opencv2/gpu/device/vecmath.hpp"
-#include "transform.hpp"
+#include "opencv2/gpu/device/transform.hpp"
+#include "opencv2/gpu/device/saturate_cast.hpp"
 #include "internal_shared.hpp"

 using namespace cv::gpu;
@@ -468,4 +469,112 @@ namespace cv { namespace gpu { namespace mathfunc
    template void max_gpu<int   >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
    template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
    template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
+
+    
+    //////////////////////////////////////////////////////////////////////////
+    // threshold
+
+    class ThreshOp
+    {
+    public:
+        ThreshOp(float thresh_, float maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
+
+    protected:
+        float thresh;
+        float maxVal;
+    };
+
+    class ThreshBinary : public ThreshOp
+    {
+    public:
+        ThreshBinary(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
+
+        template<typename T>
+        __device__ T operator()(const T& src) const
+        {
+            return (float)src > thresh ? saturate_cast<T>(maxVal) : 0;
+        }
+    };
+
+    class ThreshBinaryInv : public ThreshOp
+    {
+    public:
+        ThreshBinaryInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
+
+        template<typename T>
+        __device__ T operator()(const T& src) const
+        {
+            return (float)src > thresh ? 0 : saturate_cast<T>(maxVal);
+        }
+    };
+
+    class ThreshTrunc : public ThreshOp
+    {
+    public:
+        ThreshTrunc(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
+
+        template<typename T>
+        __device__ T operator()(const T& src) const
+        {
+            return saturate_cast<T>(fmin((float)src, thresh));
+        }
+    };
+
+    class ThreshToZero : public ThreshOp
+    {
+    public:
+        ThreshToZero(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
+
+        template<typename T>
+        __device__ T operator()(const T& src) const
+        {
+            return (float)src > thresh ? src : 0;
+        }
+    };
+
+    class ThreshToZeroInv : public ThreshOp
+    {
+    public:
+        ThreshToZeroInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}
+
+        template<typename T>
+        __device__ T operator()(const T& src) const
+        {
+            return (float)src > thresh ? 0 : src;
+        }
+    };
+
+    template <class Op, typename T>
+    void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal, 
+        cudaStream_t stream)
+    {
+        Op op(thresh, maxVal);
+        transform(src, dst, op, stream);
+    }
+
+    template <typename T>
+    void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,
+        cudaStream_t stream)
+    {
+        typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal, 
+            cudaStream_t stream);
+
+        static const caller_t callers[] = 
+        {
+            threshold_caller<ThreshBinary, T>, 
+            threshold_caller<ThreshBinaryInv, T>, 
+            threshold_caller<ThreshTrunc, T>, 
+            threshold_caller<ThreshToZero, T>, 
+            threshold_caller<ThreshToZeroInv, T>
+        };
+
+        callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
+    }
+
+    template void threshold_gpu<uchar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
+    template void threshold_gpu<schar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
+    template void threshold_gpu<ushort>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
+    template void threshold_gpu<short>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
+    template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
+    template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);
 }}}
--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
@@ -43,7 +43,7 @@
 #include "opencv2/gpu/device/limits_gpu.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/vecmath.hpp"
-#include "transform.hpp"
+#include "opencv2/gpu/device/transform.hpp"
 #include "internal_shared.hpp"

 using namespace cv::gpu;
--- a/modules/gpu/src/cuda/matrix_operations.cu
+++ b/modules/gpu/src/cuda/matrix_operations.cu
@@ -42,6 +42,7 @@

 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"
+#include "opencv2/gpu/device/transform.hpp"

 using namespace cv::gpu::device;

@@ -55,63 +56,6 @@ namespace cv { namespace gpu { namespace matrix_operations {
    template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
    template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
    template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
-    
-    template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
-    struct ReadWriteTraits
-    {
-        enum {shift=1};
-
-        typedef T read_type;
-        typedef DT write_type;
-    };
-    template <typename T, typename DT>
-    struct ReadWriteTraits<T, DT, 1, 1>
-    {
-        enum {shift=4};
-
-        typedef char4 read_type;
-        typedef char4 write_type;
-    };
-    template <typename T, typename DT>
-    struct ReadWriteTraits<T, DT, 2, 1>
-    {
-        enum {shift=4};
-
-        typedef short4 read_type;
-        typedef char4 write_type;
-    };
-    template <typename T, typename DT>
-    struct ReadWriteTraits<T, DT, 4, 1>
-    {
-        enum {shift=4};
-
-        typedef int4 read_type;
-        typedef char4 write_type;
-    };
-    template <typename T, typename DT>
-    struct ReadWriteTraits<T, DT, 1, 2>
-    {
-        enum {shift=2};
-
-        typedef char2 read_type;
-        typedef short2 write_type;
-    };
-    template <typename T, typename DT>
-    struct ReadWriteTraits<T, DT, 2, 2>
-    {
-        enum {shift=2};
-
-        typedef short2 read_type;
-        typedef short2 write_type;
-    };
-    template <typename T, typename DT>
-    struct ReadWriteTraits<T, DT, 4, 2>
-    {
-        enum {shift=2};
-
-        typedef int2 read_type;
-        typedef short2 write_type;
-    };

 ///////////////////////////////////////////////////////////////////////////
 ////////////////////////////////// CopyTo /////////////////////////////////
@@ -276,88 +220,64 @@ namespace cv { namespace gpu { namespace matrix_operations {
 //////////////////////////////// ConvertTo ////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////

-    template <typename T, typename DT>
-    __global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
+    template <typename T, typename D>
+    class Convertor
    {
-        typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
-        typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
-        const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
+    public:
+        Convertor(double alpha_, double beta_): alpha(alpha_), beta(beta_) {}

-        const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
-        const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
-
-        if (y < height)
+        __device__ D operator()(const T& src)
        {
-            const T* src = (const T*)(srcmat + src_step * y);
-            DT* dst = (DT*)(dstmat + dst_step * y);
-            if ((x * shift) + shift - 1 < width)
-            {
-                read_type srcn_el = ((read_type*)src)[x];
-                write_type dstn_el;
-
-                const T* src1_el = (const T*) &srcn_el;
-                DT* dst1_el = (DT*) &dstn_el;
-
-                for (int i = 0; i < shift; ++i)
-                    dst1_el[i] =  saturate_cast<DT>(alpha * src1_el[i] + beta);
-
-                ((write_type*)dst)[x] = dstn_el;
-            }
-            else
-            {
-                for (int i = 0; i < shift - 1; ++i)
-                    if ((x * shift) + i < width)
-                        dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
-            }
+            return saturate_cast<D>(alpha * src + beta);
        }
-    }    

-    typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
-
-    template<typename T, typename DT>
-    void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
+    private:
+        double alpha, beta;
+    };
+    
+    template<typename T, typename D>
+    void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
    {
-        const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
-
-        dim3 block(32, 8);
-        dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
-
-        convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
-        if (stream == 0)
-            cudaSafeCall( cudaThreadSynchronize() );
+        Convertor<T, D> op(alpha, beta);
+        transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
    }

-    void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
+    void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, 
+        cudaStream_t stream = 0)
    {
-        static CvtFunc tab[8][8] =
-        {
-            {cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
-            cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
+        typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, 
+            cudaStream_t stream);

-            {cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
-            cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
-
-            {cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
-            cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
-
-            {cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
-            cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
-
-            {cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
-            cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
-
-            {cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
-            cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
-
-            {cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
-            cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
-
-            {0,0,0,0,0,0,0,0}
+        static const caller_t tab[8][8] =
+        {
+            {cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
+            cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
+
+            {cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
+            cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
+
+            {cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
+            cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
+
+            {cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
+            cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
+
+            {cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
+            cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
+
+            {cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
+            cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
+
+            {cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
+            cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
+
+            {0,0,0,0,0,0,0,0}
        };

-        CvtFunc func = tab[sdepth][ddepth];
-        if (func == 0)
-            cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
-        func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
+        caller_t func = tab[sdepth][ddepth];
+        if (!func)
+            cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
+
+        func(src, dst, alpha, beta, stream);
    }
 }}}
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@@ -43,7 +43,7 @@
 #include "opencv2/gpu/device/limits_gpu.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/vecmath.hpp"
-#include "transform.hpp"
+#include "opencv2/gpu/device/transform.hpp"
 #include "internal_shared.hpp"

 using namespace cv::gpu;
--- a/modules/gpu/src/cuda/surf.cu
+++ b/modules/gpu/src/cuda/surf.cu
@@ -46,7 +46,6 @@
 //M*/

 #include "internal_shared.hpp"
-#include "surf_key_point.h"
 #include "opencv2/gpu/device/limits_gpu.hpp"

 using namespace cv::gpu;
--- a/modules/gpu/src/cuda/surf_key_point.h
+++ b/modules/gpu/src/cuda/surf_key_point.h
@@ -1,54 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_SURF_KEY_POINT_H__
-#define __OPENCV_SURF_KEY_POINT_H__
-
-namespace cv
-{
-    namespace gpu
-    {
-       
-    }
-}
-
-#endif // __OPENCV_SURF_KEY_POINT_H__
--- a/modules/gpu/src/cuda/transform.hpp
+++ b/modules/gpu/src/cuda/transform.hpp
@@ -1,130 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef __OPENCV_GPU_TRANSFORM_HPP__
-#define __OPENCV_GPU_TRANSFORM_HPP__
-
-#include "internal_shared.hpp"
-
-namespace cv { namespace gpu { namespace device
-{
-    //! Mask accessor
-    template<class T> struct MaskReader_
-    {
-        PtrStep_<T> mask;
-        explicit MaskReader_(PtrStep_<T> mask): mask(mask) {}                
-
-        __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
-    };
-
-    //! Stub mask accessor
-    struct NoMask 
-    {
-        __device__ bool operator()(int y, int x) const { return true; } 
-    };
-
-    //! Transform kernels
-
-    template <typename T, typename D, typename UnOp, typename Mask>
-    static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
-    {
-		const int x = blockDim.x * blockIdx.x + threadIdx.x;
-		const int y = blockDim.y * blockIdx.y + threadIdx.y;
-
-        if (x < src.cols && y < src.rows && mask(y, x))
-        {
-            T src_data = src.ptr(y)[x];
-            dst.ptr(y)[x] = op(src_data);
-        }
-    }
-
-    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
-    static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
-    {
-		const int x = blockDim.x * blockIdx.x + threadIdx.x;
-		const int y = blockDim.y * blockIdx.y + threadIdx.y;
-
-        if (x < src1.cols && y < src1.rows && mask(y, x))
-        {
-            T1 src1_data = src1.ptr(y)[x];
-            T2 src2_data = src2.ptr(y)[x];
-            dst.ptr(y)[x] = op(src1_data, src2_data);
-        }
-    }  
-}}}
-
-namespace cv 
-{ 
-    namespace gpu 
-    {
-        template <typename T, typename D, typename UnOp>
-        static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
-        {
-            dim3 threads(16, 16, 1);
-            dim3 grid(1, 1, 1);
-
-            grid.x = divUp(src.cols, threads.x);
-            grid.y = divUp(src.rows, threads.y);        
-
-            device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
-
-            if (stream == 0)
-                cudaSafeCall( cudaThreadSynchronize() );
-        }
-        template <typename T1, typename T2, typename D, typename BinOp>
-        static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)
-        {
-            dim3 threads(16, 16, 1);
-            dim3 grid(1, 1, 1);
-
-            grid.x = divUp(src1.cols, threads.x);
-            grid.y = divUp(src1.rows, threads.y);        
-
-            device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);
-
-            if (stream == 0)
-                cudaSafeCall( cudaThreadSynchronize() );            
-        }
-    }
-}
-
-#endif // __OPENCV_GPU_TRANSFORM_HPP__