Revert "Revert "Merge pull request #836 from jet47:gpu-modules""

2013-06-04 13:32:35 +04:00
parent 10340fe234
commit 3eeaa9189c
472 changed files with 29894 additions and 23019 deletions
--- a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
@@ -73,8 +73,8 @@ namespace cv { namespace gpu { namespace cudev
            return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
        }

-        const int width;
-        const D val;
+        int width;
+        D val;
    };

    template <typename D> struct BrdColConstant
@@ -98,8 +98,8 @@ namespace cv { namespace gpu { namespace cudev
            return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }

-        const int height;
-        const D val;
+        int height;
+        D val;
    };

    template <typename D> struct BrdConstant
@@ -120,9 +120,9 @@ namespace cv { namespace gpu { namespace cudev
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }

-        const int height;
-        const int width;
-        const D val;
+        int height;
+        int width;
+        D val;
    };

    //////////////////////////////////////////////////////////////
@@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(data[idx_col(x)]);
        }

-        const int last_col;
+        int last_col;
    };

    template <typename D> struct BrdColReplicate
@@ -205,7 +205,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
        }

-        const int last_row;
+        int last_row;
    };

    template <typename D> struct BrdReplicate
@@ -255,8 +255,8 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

-        const int last_row;
-        const int last_col;
+        int last_row;
+        int last_col;
    };

    //////////////////////////////////////////////////////////////
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(data[idx_col(x)]);
        }

-        const int last_col;
+        int last_col;
    };

    template <typename D> struct BrdColReflect101
@@ -339,7 +339,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }

-        const int last_row;
+        int last_row;
    };

    template <typename D> struct BrdReflect101
@@ -389,8 +389,8 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

-        const int last_row;
-        const int last_col;
+        int last_row;
+        int last_col;
    };

    //////////////////////////////////////////////////////////////
@@ -433,7 +433,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(data[idx_col(x)]);
        }

-        const int last_col;
+        int last_col;
    };

    template <typename D> struct BrdColReflect
@@ -473,7 +473,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }

-        const int last_row;
+        int last_row;
    };

    template <typename D> struct BrdReflect
@@ -523,8 +523,8 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

-        const int last_row;
-        const int last_col;
+        int last_row;
+        int last_col;
    };

    //////////////////////////////////////////////////////////////
@@ -567,7 +567,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(data[idx_col(x)]);
        }

-        const int width;
+        int width;
    };

    template <typename D> struct BrdColWrap
@@ -607,7 +607,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }

-        const int height;
+        int height;
    };

    template <typename D> struct BrdWrap
@@ -664,8 +664,8 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }

-        const int height;
-        const int width;
+        int height;
+        int width;
    };

    //////////////////////////////////////////////////////////////
@@ -683,8 +683,8 @@ namespace cv { namespace gpu { namespace cudev
            return b.at(y, x, ptr);
        }

-        const Ptr2D ptr;
-        const B b;
+        Ptr2D ptr;
+        B b;
    };

    // under win32 there is some bug with templated types that passed as kernel parameters
@@ -704,10 +704,10 @@ namespace cv { namespace gpu { namespace cudev
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }

-        const Ptr2D src;
-        const int height;
-        const int width;
-        const D val;
+        Ptr2D src;
+        int height;
+        int width;
+        D val;
    };
 }}} // namespace cv { namespace gpu { namespace cudev

--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -87,15 +87,6 @@ namespace cv { namespace gpu

 namespace cv { namespace gpu
 {
-    enum
-    {
-        BORDER_REFLECT101_GPU = 0,
-        BORDER_REPLICATE_GPU,
-        BORDER_CONSTANT_GPU,
-        BORDER_REFLECT_GPU,
-        BORDER_WRAP_GPU
-    };
-
    namespace cudev
    {
        __host__ __device__ __forceinline__ int divUp(int total, int grain)
--- a/modules/core/include/opencv2/core/cuda/emulation.hpp
+++ b/modules/core/include/opencv2/core/cuda/emulation.hpp
@@ -43,6 +43,7 @@
 #ifndef OPENCV_GPU_EMULATION_HPP_
 #define OPENCV_GPU_EMULATION_HPP_

+#include "common.hpp"
 #include "warp_reduce.hpp"

 namespace cv { namespace gpu { namespace cudev
@@ -131,8 +132,130 @@ namespace cv { namespace gpu { namespace cudev
                return ::atomicMin(address, val);
 #endif
            }
+        }; // struct cmem
+
+        struct glob
+        {
+            static __device__ __forceinline__ int atomicAdd(int* address, int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ float atomicAdd(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 200
+                return ::atomicAdd(address, val);
+            #else
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(val + __int_as_float(assumed)));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #endif
+            }
+            static __device__ __forceinline__ double atomicAdd(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val + __longlong_as_double(assumed)));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMin(int* address, int val)
+            {
+                return ::atomicMin(address, val);
+            }
+            static __device__ __forceinline__ float atomicMin(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fminf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMin(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMax(int* address, int val)
+            {
+                return ::atomicMax(address, val);
+            }
+            static __device__ __forceinline__ float atomicMax(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMax(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                (void) address;
+                (void) val;
+                return 0.0;
+            #endif
+            }
        };
-    };
+    }; //struct Emulation
 }}} // namespace cv { namespace gpu { namespace cudev

 #endif /* OPENCV_GPU_EMULATION_HPP_ */
--- a/modules/core/include/opencv2/core/cuda/filters.hpp
+++ b/modules/core/include/opencv2/core/cuda/filters.hpp
@@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace cudev
            return src(__float2int_rz(y), __float2int_rz(x));
        }

-        const Ptr2D src;
+        Ptr2D src;
    };

    template <typename Ptr2D> struct LinearFilter
@@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<elem_type>(out);
        }

-        const Ptr2D src;
+        Ptr2D src;
    };

    template <typename Ptr2D> struct CubicFilter
@@ -166,7 +166,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<elem_type>(res);
        }

-        const Ptr2D src;
+        Ptr2D src;
    };
    // for integer scaling
    template <typename Ptr2D> struct IntegerAreaFilter
@@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<elem_type>(out);
        }

-        const Ptr2D src;
+        Ptr2D src;
        float scale_x, scale_y ,scale;
    };

@@ -269,7 +269,7 @@ namespace cv { namespace gpu { namespace cudev
            return saturate_cast<elem_type>(out);
        }

-        const Ptr2D src;
+        Ptr2D src;
        float scale_x, scale_y;
        int width, haight;
    };
--- a/modules/core/include/opencv2/core/cuda/functional.hpp
+++ b/modules/core/include/opencv2/core/cuda/functional.hpp
@@ -554,8 +554,8 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
            : thresh(other.thresh), maxVal(other.maxVal) {}

-        const T thresh;
-        const T maxVal;
+        T thresh;
+        T maxVal;
    };

    template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
@@ -571,8 +571,8 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
            : thresh(other.thresh), maxVal(other.maxVal) {}

-        const T thresh;
-        const T maxVal;
+        T thresh;
+        T maxVal;
    };

    template <typename T> struct thresh_trunc_func : unary_function<T, T>
@@ -588,7 +588,7 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
            : thresh(other.thresh) {}

-        const T thresh;
+        T thresh;
    };

    template <typename T> struct thresh_to_zero_func : unary_function<T, T>
@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace cudev
       __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
            : thresh(other.thresh) {}

-        const T thresh;
+        T thresh;
    };

    template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
@@ -620,7 +620,7 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
            : thresh(other.thresh) {}

-        const T thresh;
+        T thresh;
    };

    // Function Object Adaptors
@@ -636,7 +636,7 @@ namespace cv { namespace gpu { namespace cudev
      __host__ __device__ __forceinline__ unary_negate() {}
      __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}

-      const Predicate pred;
+      Predicate pred;
    };

    template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
@@ -657,7 +657,7 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ binary_negate() {}
        __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}

-        const Predicate pred;
+        Predicate pred;
    };

    template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
@@ -677,8 +677,8 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ binder1st() {}
        __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}

-        const Op op;
-        const typename Op::first_argument_type arg1;
+        Op op;
+        typename Op::first_argument_type arg1;
    };

    template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
@@ -698,8 +698,8 @@ namespace cv { namespace gpu { namespace cudev
        __host__ __device__ __forceinline__ binder2nd() {}
        __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}

-        const Op op;
-        const typename Op::second_argument_type arg2;
+        Op op;
+        typename Op::second_argument_type arg2;
    };

    template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
--- a/modules/core/include/opencv2/core/gpu_private.hpp
+++ b/modules/core/include/opencv2/core/gpu_private.hpp
@@ -74,10 +74,6 @@
 namespace cv { namespace gpu {
    CV_EXPORTS cv::String getNppErrorMessage(int code);
    CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
-
-    // Converts CPU border extrapolation mode into GPU internal analogue.
-    // Returns true if the GPU analogue exists, false otherwise.
-    CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
 }}

 #ifndef HAVE_CUDA