fixed gpu::cvtColor

2011-06-15 11:57:08 +00:00
parent ada3e6e624
commit 0d09352fca
3 changed files with 173 additions and 216 deletions
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@@ -55,18 +55,11 @@ using namespace cv::gpu::device;

 namespace cv { namespace gpu { namespace color
 {
-    template<typename T> struct ColorChannel;
-    template<> struct ColorChannel<uchar>
+    template<typename T> struct ColorChannel
    {
        typedef float worktype_f;
-        static __device__ __forceinline__ uchar max() { return UCHAR_MAX; }
-        static __device__ __forceinline__ uchar half() { return (uchar)(max()/2 + 1); }
-    };
-    template<> struct ColorChannel<ushort>
-    {
-        typedef float worktype_f;
-        static __device__ __forceinline__ ushort max() { return USHRT_MAX; }
-        static __device__ __forceinline__ ushort half() { return (ushort)(max()/2 + 1); }
+        static __device__ __forceinline__ T max() { return numeric_limits_gpu<T>::max(); }
+        static __device__ __forceinline__ T half() { return (T)(max()/2 + 1); }
    };
    template<> struct ColorChannel<float>
    {
@@ -140,9 +133,9 @@ namespace cv { namespace gpu { namespace color
    void RGB2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
    {
        typedef void (*RGB2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
-        static const RGB2RGB_caller_t RGB2RGB_callers[2][2] = 
+        static const RGB2RGB_caller_t RGB2RGB_callers[2][2] =
        {
-            {RGB2RGB_caller<uchar, 3, 3>, RGB2RGB_caller<uchar, 3, 4>}, 
+            {RGB2RGB_caller<uchar, 3, 3>, RGB2RGB_caller<uchar, 3, 4>},
            {RGB2RGB_caller<uchar, 4, 3>, RGB2RGB_caller<uchar, 4, 4>}
        };

@@ -152,9 +145,9 @@ namespace cv { namespace gpu { namespace color
    void RGB2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
    {
        typedef void (*RGB2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
-        static const RGB2RGB_caller_t RGB2RGB_callers[2][2] = 
+        static const RGB2RGB_caller_t RGB2RGB_callers[2][2] =
        {
-            {RGB2RGB_caller<ushort, 3, 3>, RGB2RGB_caller<ushort, 3, 4>}, 
+            {RGB2RGB_caller<ushort, 3, 3>, RGB2RGB_caller<ushort, 3, 4>},
            {RGB2RGB_caller<ushort, 4, 3>, RGB2RGB_caller<ushort, 4, 4>}
        };

@@ -164,9 +157,9 @@ namespace cv { namespace gpu { namespace color
    void RGB2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
    {
        typedef void (*RGB2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
-        static const RGB2RGB_caller_t RGB2RGB_callers[2][2] = 
+        static const RGB2RGB_caller_t RGB2RGB_callers[2][2] =
        {
-            {RGB2RGB_caller<float, 3, 3>, RGB2RGB_caller<float, 3, 4>}, 
+            {RGB2RGB_caller<float, 3, 3>, RGB2RGB_caller<float, 3, 4>},
            {RGB2RGB_caller<float, 4, 3>, RGB2RGB_caller<float, 4, 4>}
        };

@@ -175,12 +168,12 @@ namespace cv { namespace gpu { namespace color

 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////

-    template <int GREEN_BITS> struct RGB5x52RGBConverter;    
+    template <int GREEN_BITS> struct RGB5x52RGBConverter;
    template <> struct RGB5x52RGBConverter<5>
    {
        template <typename D>
        static __device__ __forceinline__ void cvt(uint src, D& dst, int bidx)
-        {            
+        {
            (&dst.x)[bidx] = (uchar)(src << 3);
            dst.y = (uchar)((src >> 2) & ~7);
            (&dst.x)[bidx ^ 2] = (uchar)((src >> 7) & ~7);
@@ -191,7 +184,7 @@ namespace cv { namespace gpu { namespace color
    {
        template <typename D>
        static __device__ __forceinline__ void cvt(uint src, D& dst, int bidx)
-        {            
+        {
            (&dst.x)[bidx] = (uchar)(src << 3);
            dst.y = (uchar)((src >> 3) & ~3);
            (&dst.x)[bidx ^ 2] = (uchar)((src >> 8) & ~7);
@@ -218,7 +211,7 @@ namespace cv { namespace gpu { namespace color
    };

    template <int GREEN_BITS> struct RGB2RGB5x5Converter;
-    template<> struct RGB2RGB5x5Converter<6> 
+    template<> struct RGB2RGB5x5Converter<6>
    {
        template <typename T>
        static __device__ __forceinline__ ushort cvt(const T& src, int bidx)
@@ -226,7 +219,7 @@ namespace cv { namespace gpu { namespace color
            return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
        }
    };
-    template<> struct RGB2RGB5x5Converter<5> 
+    template<> struct RGB2RGB5x5Converter<5>
    {
        static __device__ __forceinline__ ushort cvt(const uchar3& src, int bidx)
        {
@@ -236,7 +229,7 @@ namespace cv { namespace gpu { namespace color
        {
            return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7) | (src.w ? 0x8000 : 0));
        }
-    };   
+    };

    template<int SRCCN, int GREEN_BITS> struct RGB2RGB5x5
    {
@@ -264,7 +257,7 @@ namespace cv { namespace gpu { namespace color
    void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream)
    {
        typedef void (*RGB5x52RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
-        static const RGB5x52RGB_caller_t RGB5x52RGB_callers[2][2] = 
+        static const RGB5x52RGB_caller_t RGB5x52RGB_callers[2][2] =
        {
            {RGB5x52RGB_caller<5, 3>, RGB5x52RGB_caller<5, 4>},
            {RGB5x52RGB_caller<6, 3>, RGB5x52RGB_caller<6, 4>}
@@ -283,7 +276,7 @@ namespace cv { namespace gpu { namespace color
    void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream)
    {
        typedef void (*RGB2RGB5x5_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
-        static const RGB2RGB5x5_caller_t RGB2RGB5x5_callers[2][2] = 
+        static const RGB2RGB5x5_caller_t RGB2RGB5x5_callers[2][2] =
        {
            {RGB2RGB5x5_caller<3, 5>, RGB2RGB5x5_caller<3, 6>},
            {RGB2RGB5x5_caller<4, 5>, RGB2RGB5x5_caller<4, 6>}
@@ -303,7 +296,7 @@ namespace cv { namespace gpu { namespace color
        {
            dst_t dst;

-            dst.z = dst.y = dst.x = src;            
+            dst.z = dst.y = dst.x = src;
            setAlpha(dst, ColorChannel<T>::max());

            return dst;
@@ -311,14 +304,14 @@ namespace cv { namespace gpu { namespace color
    };

    template <int GREEN_BITS> struct Gray2RGB5x5Converter;
-    template<> struct Gray2RGB5x5Converter<6> 
+    template<> struct Gray2RGB5x5Converter<6>
    {
        static __device__ __forceinline__ ushort cvt(uint t)
        {
            return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
        }
    };
-    template<> struct Gray2RGB5x5Converter<5> 
+    template<> struct Gray2RGB5x5Converter<5>
    {
        static __device__ __forceinline__ ushort cvt(uint t)
        {
@@ -379,7 +372,7 @@ namespace cv { namespace gpu { namespace color
    void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream)
    {
        typedef void (*Gray2RGB5x5_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
-        static const Gray2RGB5x5_caller_t Gray2RGB5x5_callers[2] = 
+        static const Gray2RGB5x5_caller_t Gray2RGB5x5_callers[2] =
        {
            Gray2RGB5x5_caller<5>, Gray2RGB5x5_caller<6>
        };
@@ -392,7 +385,7 @@ namespace cv { namespace gpu { namespace color
    #undef R2Y
    #undef G2Y
    #undef B2Y
-    
+
    enum
    {
        yuv_shift  = 14,
@@ -404,20 +397,20 @@ namespace cv { namespace gpu { namespace color
    };

    template <int GREEN_BITS> struct RGB5x52GrayConverter;
-    template<> struct RGB5x52GrayConverter<6> 
+    template<> struct RGB5x52GrayConverter<6>
    {
        static __device__ __forceinline__ uchar cvt(uint t)
        {
            return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);
        }
    };
-    template<> struct RGB5x52GrayConverter<5> 
+    template<> struct RGB5x52GrayConverter<5>
    {
        static __device__ __forceinline__ uchar cvt(uint t)
        {
            return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);
        }
-    };   
+    };

    template<int GREEN_BITS> struct RGB5x52Gray
    {
@@ -458,7 +451,7 @@ namespace cv { namespace gpu { namespace color

    private:
        int bidx;
-    }; 
+    };

    template <typename T, int SRCCN>
    void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
@@ -489,7 +482,7 @@ namespace cv { namespace gpu { namespace color
        RGB2Gray_caller_t RGB2Gray_callers[] = {RGB2Gray_caller<float, 3>, RGB2Gray_caller<float, 4>};

        RGB2Gray_callers[srccn - 3](src, dst, bidx, stream);
-    }    
+    }

    template <int GREEN_BITS>
    void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
@@ -501,7 +494,7 @@ namespace cv { namespace gpu { namespace color
    void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream)
    {
        typedef void (*RGB5x52Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
-        static const RGB5x52Gray_caller_t RGB5x52Gray_callers[2] = 
+        static const RGB5x52Gray_caller_t RGB5x52Gray_callers[2] =
        {
            RGB5x52Gray_caller<5>, RGB5x52Gray_caller<6>
        };
@@ -513,7 +506,7 @@ namespace cv { namespace gpu { namespace color

    __constant__ int cYCrCbCoeffs_i[5];
    __constant__ float cYCrCbCoeffs_f[5];
-    
+
    template <typename T, typename D>
    __device__ __forceinline__ void RGB2YCrCbConvert(const T* src, D& dst, int bidx)
    {
@@ -571,7 +564,7 @@ namespace cv { namespace gpu { namespace color
    private:
        int bidx;
    };
-    
+
    template <typename T, typename D>
    __device__ __forceinline__ void YCrCb2RGBConvert(const T& src, D* dst, int bidx)
    {
@@ -642,7 +635,7 @@ namespace cv { namespace gpu { namespace color
    void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream);
-        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] = 
+        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] =
        {
            {RGB2YCrCb_caller<uchar, 3, 3>, RGB2YCrCb_caller<uchar, 3, 4>},
            {RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
@@ -654,7 +647,7 @@ namespace cv { namespace gpu { namespace color
    void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream);
-        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] = 
+        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] =
        {
            {RGB2YCrCb_caller<ushort, 3, 3>, RGB2YCrCb_caller<ushort, 3, 4>},
            {RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}
@@ -666,7 +659,7 @@ namespace cv { namespace gpu { namespace color
    void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream);
-        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] = 
+        static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] =
        {
            {RGB2YCrCb_caller<float, 3, 3>, RGB2YCrCb_caller<float, 3, 4>},
            {RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
@@ -674,7 +667,7 @@ namespace cv { namespace gpu { namespace color

        RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, coeffs, stream);
    }
-    
+
    template <typename T, int SRCCN, int DSTCN>
    void YCrCb2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream)
    {
@@ -686,7 +679,7 @@ namespace cv { namespace gpu { namespace color
    void YCrCb2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream);
-        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] = 
+        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] =
        {
            {YCrCb2RGB_caller<uchar, 3, 3>, YCrCb2RGB_caller<uchar, 3, 4>},
            {YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
@@ -698,24 +691,24 @@ namespace cv { namespace gpu { namespace color
    void YCrCb2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream);
-        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] = 
+        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] =
        {
            {YCrCb2RGB_caller<ushort, 3, 3>, YCrCb2RGB_caller<ushort, 3, 4>},
            {YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}
        };
-        
+
        YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, coeffs, stream);
    }

    void YCrCb2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, const void* coeffs, cudaStream_t stream);
-        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] = 
+        static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] =
        {
            {YCrCb2RGB_caller<float, 3, 3>, YCrCb2RGB_caller<float, 3, 4>},
            {YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
        };
-        
+
        YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, coeffs, stream);
    }

@@ -777,15 +770,15 @@ namespace cv { namespace gpu { namespace color
    __device__ __forceinline__ void XYZ2RGBConvert(const T& src, D* dst)
    {
        dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));
-	    dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
-	    dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
+            dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
+            dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
    }
    template <typename T>
    __device__ __forceinline__ void XYZ2RGBConvert(const T& src, float* dst)
    {
        dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];
-	    dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
-	    dst[2] = src.x * cXYZ_D65f[6] + src.y * cXYZ_D65f[7] + src.z * cXYZ_D65f[8];
+            dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
+            dst[2] = src.x * cXYZ_D65f[6] + src.y * cXYZ_D65f[7] + src.z * cXYZ_D65f[8];
    }

    template <typename T> struct XYZ2RGBBase
@@ -811,7 +804,7 @@ namespace cv { namespace gpu { namespace color
        typedef typename RGB2XYZBase<T>::coeff_t coeff_t;
        typedef typename TypeVec<T, SRCCN>::vec_t src_t;
        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
-        
+
        explicit XYZ2RGB(const coeff_t coeffs[9]) : XYZ2RGBBase<T>(coeffs) {}

        __device__ __forceinline__ dst_t operator()(const src_t& src) const
@@ -834,7 +827,7 @@ namespace cv { namespace gpu { namespace color
    void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream);
-        static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] = 
+        static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] =
        {
            {RGB2XYZ_caller<uchar, 3, 3>, RGB2XYZ_caller<uchar, 3, 4>},
            {RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
@@ -846,27 +839,27 @@ namespace cv { namespace gpu { namespace color
    void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream);
-        static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] = 
+        static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] =
        {
            {RGB2XYZ_caller<ushort, 3, 3>, RGB2XYZ_caller<ushort, 3, 4>},
            {RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}
        };
-        
+
        RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, coeffs, stream);
    }

    void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream);
-        static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] = 
+        static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] =
        {
            {RGB2XYZ_caller<float, 3, 3>, RGB2XYZ_caller<float, 3, 4>},
            {RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
        };
-        
+
        RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, coeffs, stream);
    }
-    
+
    template <typename T, int SRCCN, int DSTCN>
    void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream)
    {
@@ -878,7 +871,7 @@ namespace cv { namespace gpu { namespace color
    void XYZ2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream);
-        static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] = 
+        static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] =
        {
            {XYZ2RGB_caller<uchar, 3, 3>, XYZ2RGB_caller<uchar, 3, 4>},
            {XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
@@ -890,7 +883,7 @@ namespace cv { namespace gpu { namespace color
    void XYZ2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream);
-        static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] = 
+        static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] =
        {
            {XYZ2RGB_caller<ushort, 3, 3>, XYZ2RGB_caller<ushort, 3, 4>},
            {XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}
@@ -902,12 +895,12 @@ namespace cv { namespace gpu { namespace color
    void XYZ2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const void* coeffs, cudaStream_t stream)
    {
        typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, const void* coeffs, cudaStream_t stream);
-        static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] = 
+        static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] =
        {
            {XYZ2RGB_caller<float, 3, 3>, XYZ2RGB_caller<float, 3, 4>},
            {XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
        };
-        
+
        XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, coeffs, stream);
    }

@@ -916,12 +909,11 @@ namespace cv { namespace gpu { namespace color
    __constant__ int cHsvDivTable   [256] = {0, 1044480, 522240, 348160, 261120, 208896, 174080, 149211, 130560, 116053, 104448, 94953, 87040, 80345, 74606, 69632, 65280, 61440, 58027, 54973, 52224, 49737, 47476, 45412, 43520, 41779, 40172, 38684, 37303, 36017, 34816, 33693, 32640, 31651, 30720, 29842, 29013, 28229, 27486, 26782, 26112, 25475, 24869, 24290, 23738, 23211, 22706, 22223, 21760, 21316, 20890, 20480, 20086, 19707, 19342, 18991, 18651, 18324, 18008, 17703, 17408, 17123, 16846, 16579, 16320, 16069, 15825, 15589, 15360, 15137, 14921, 14711, 14507, 14308, 14115, 13926, 13743, 13565, 13391, 13221, 13056, 12895, 12738, 12584, 12434, 12288, 12145, 12006, 11869, 11736, 11605, 11478, 11353, 11231, 11111, 10995, 10880, 10768, 10658, 10550, 10445, 10341, 10240, 10141, 10043, 9947, 9854, 9761, 9671, 9582, 9495, 9410, 9326, 9243, 9162, 9082, 9004, 8927, 8852, 8777, 8704, 8632, 8561, 8492, 8423, 8356, 8290, 8224, 8160, 8097, 8034, 7973, 7913, 7853, 7795, 7737, 7680, 7624, 7569, 7514, 7461, 7408, 7355, 7304, 7253, 7203, 7154, 7105, 7057, 7010, 6963, 6917, 6872, 6827, 6782, 6739, 6695, 6653, 6611, 6569, 6528, 6487, 6447, 6408, 6369, 6330, 6292, 6254, 6217, 6180, 6144, 6108, 6073, 6037, 6003, 5968, 5935, 5901, 5868, 5835, 5803, 5771, 5739, 5708, 5677, 5646, 5615, 5585, 5556, 5526, 5497, 5468, 5440, 5412, 5384, 5356, 5329, 5302, 5275, 5249, 5222, 5196, 5171, 5145, 5120, 5095, 5070, 5046, 5022, 4998, 4974, 4950, 4927, 4904, 4881, 4858, 4836, 4813, 4791, 4769, 4748, 4726, 4705, 4684, 4663, 4642, 4622, 4601, 4581, 4561, 4541, 4522, 4502, 4483, 4464, 4445, 4426, 4407, 4389, 4370, 4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229, 4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096};
    __constant__ int cHsvDivTable180[256] = {0, 122880, 61440, 40960, 30720, 24576, 20480, 17554, 15360, 13653, 12288, 11171, 10240, 9452, 8777, 8192, 7680, 7228, 6827, 6467, 6144, 5851, 5585, 5343, 5120, 4915, 4726, 4551, 4389, 4237, 4096, 3964, 3840, 3724, 3614, 3511, 3413, 3321, 3234, 3151, 3072, 2997, 2926, 2858, 2793, 2731, 2671, 2614, 2560, 2508, 2458, 2409, 2363, 2318, 2276, 2234, 2194, 2156, 2119, 2083, 2048, 2014, 1982, 1950, 1920, 1890, 1862, 1834, 1807, 1781, 1755, 1731, 1707, 1683, 1661, 1638, 1617, 1596, 1575, 1555, 1536, 1517, 1499, 1480, 1463, 1446, 1429, 1412, 1396, 1381, 1365, 1350, 1336, 1321, 1307, 1293, 1280, 1267, 1254, 1241, 1229, 1217, 1205, 1193, 1182, 1170, 1159, 1148, 1138, 1127, 1117, 1107, 1097, 1087, 1078, 1069, 1059, 1050, 1041, 1033, 1024, 1016, 1007, 999, 991, 983, 975, 968, 960, 953, 945, 938, 931, 924, 917, 910, 904, 897, 890, 884, 878, 871, 865, 859, 853, 847, 842, 836, 830, 825, 819, 814, 808, 803, 798, 793, 788, 783, 778, 773, 768, 763, 759, 754, 749, 745, 740, 736, 731, 727, 723, 719, 714, 710, 706, 702, 698, 694, 690, 686, 683, 679, 675, 671, 668, 664, 661, 657, 654, 650, 647, 643, 640, 637, 633, 630, 627, 624, 621, 617, 614, 611, 608, 605, 602, 599, 597, 594, 591, 588, 585, 582, 580, 577, 574, 572, 569, 566, 564, 561, 559, 556, 554, 551, 549, 546, 544, 541, 539, 537, 534, 532, 530, 527, 525, 523, 521, 518, 516, 514, 512, 510, 508, 506, 504, 502, 500, 497, 495, 493, 492, 490, 488, 486, 484, 482};
    __constant__ int cHsvDivTable256[256] = {0, 174763, 87381, 58254, 43691, 34953, 29127, 24966, 21845, 19418, 17476, 15888, 14564, 13443, 12483, 11651, 10923, 10280, 9709, 9198, 8738, 8322, 7944, 7598, 7282, 6991, 6722, 6473, 6242, 6026, 5825, 5638, 5461, 5296, 5140, 4993, 4855, 4723, 4599, 4481, 4369, 4263, 4161, 4064, 3972, 3884, 3799, 3718, 3641, 3567, 3495, 3427, 3361, 3297, 3236, 3178, 3121, 3066, 3013, 2962, 2913, 2865, 2819, 2774, 2731, 2689, 2648, 2608, 2570, 2533, 2497, 2461, 2427, 2394, 2362, 2330, 2300, 2270, 2241, 2212, 2185, 2158, 2131, 2106, 2081, 2056, 2032, 2009, 1986, 1964, 1942, 1920, 1900, 1879, 1859, 1840, 1820, 1802, 1783, 1765, 1748, 1730, 1713, 1697, 1680, 1664, 1649, 1633, 1618, 1603, 1589, 1574, 1560, 1547, 1533, 1520, 1507, 1494, 1481, 1469, 1456, 1444, 1432, 1421, 1409, 1398, 1387, 1376, 1365, 1355, 1344, 1334, 1324, 1314, 1304, 1295, 1285, 1276, 1266, 1257, 1248, 1239, 1231, 1222, 1214, 1205, 1197, 1189, 1181, 1173, 1165, 1157, 1150, 1142, 1135, 1128, 1120, 1113, 1106, 1099, 1092, 1085, 1079, 1072, 1066, 1059, 1053, 1046, 1040, 1034, 1028, 1022, 1016, 1010, 1004, 999, 993, 987, 982, 976, 971, 966, 960, 955, 950, 945, 940, 935, 930, 925, 920, 915, 910, 906, 901, 896, 892, 887, 883, 878, 874, 869, 865, 861, 857, 853, 848, 844, 840, 836, 832, 828, 824, 820, 817, 813, 809, 805, 802, 798, 794, 791, 787, 784, 780, 777, 773, 770, 767, 763, 760, 757, 753, 750, 747, 744, 741, 737, 734, 731, 728, 725, 722, 719, 716, 713, 710, 708, 705, 702, 699, 696, 694, 691, 688, 685};
-    
-    template <int HR, typename D>
-    __device__ void RGB2HSVConvert(const uchar* src, D& dst, int bidx)
+
+    template <typename D> __device__ void RGB2HSVConvert(const uchar* src, D& dst, int bidx, int hr)
    {
        const int hsv_shift = 12;
-        const int* hdiv_table = HR == 180 ? cHsvDivTable180 : cHsvDivTable256;
+        const int* hdiv_table = hr == 180 ? cHsvDivTable180 : cHsvDivTable256;

        int b = src[bidx], g = src[1], r = src[bidx^2];
        int h, s, v = b;
@@ -940,16 +932,15 @@ namespace cv { namespace gpu { namespace color
        s = (diff * cHsvDivTable[v] + (1 << (hsv_shift-1))) >> hsv_shift;
        h = (vr & (g - b)) + (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
        h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
-        h += h < 0 ? HR : 0;
+        h += h < 0 ? hr : 0;

        dst.x = saturate_cast<uchar>(h);
        dst.y = (uchar)s;
        dst.z = (uchar)v;
    }
-    template<int HR, typename D> 
-    __device__ void RGB2HSVConvert(const float* src, D& dst, int bidx)
+    template<typename D> __device__ void RGB2HSVConvert(const float* src, D& dst, int bidx, int hr)
    {
-        const float hscale = HR * (1.f / 360.f);
+        const float hscale = hr * (1.f / 360.f);

        float b = src[bidx], g = src[1], r = src[bidx^2];
        float h, s, v;
@@ -980,34 +971,34 @@ namespace cv { namespace gpu { namespace color
        dst.z = v;
    }

-    template <int SRCCN, int DSTCN, int HR, typename T> struct RGB2HSV
+    template <int SRCCN, int DSTCN, typename T> struct RGB2HSV
    {
        typedef typename TypeVec<T, SRCCN>::vec_t src_t;
        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;

-        explicit RGB2HSV(int bidx) : bidx(bidx) {}
+        RGB2HSV(int bidx, int hr) : bidx(bidx), hr(hr) {}

        __device__ __forceinline__ dst_t operator()(const src_t& src) const
        {
            dst_t dst;
-            RGB2HSVConvert<HR>(&src.x, dst, bidx);
+            RGB2HSVConvert(&src.x, dst, bidx, hr);
            return dst;
        }

    private:
        int bidx;
+        int hr;
    };

-    __constant__ int cHsvSectorData[6][3] = 
+    __constant__ int cHsvSectorData[6][3] =
    {
        {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}
    };

-    template <int HR, typename T>
-    __device__ void HSV2RGBConvert(const T& src, float* dst, int bidx)
+    template <typename T> __device__ void HSV2RGBConvert(const T& src, float* dst, int bidx, int hr)
    {
-        const float hscale = 6.f / HR;
-        
+        const float hscale = 6.f / hr;
+
        float h = src.x, s = src.y, v = src.z;
        float b, g, r;

@@ -1039,8 +1030,7 @@ namespace cv { namespace gpu { namespace color
        dst[1] = g;
        dst[bidx^2] = r;
    }
-    template <int HR, typename T>
-    __device__ void HSV2RGBConvert(const T& src, uchar* dst, int bidx)
+    template <typename T> __device__ void HSV2RGBConvert(const T& src, uchar* dst, int bidx, int hr)
    {
        float3 buf;

@@ -1048,51 +1038,44 @@ namespace cv { namespace gpu { namespace color
        buf.y = src.y * (1.f/255.f);
        buf.z = src.z * (1.f/255.f);

-        HSV2RGBConvert<HR>(buf, &buf.x, bidx);
+        HSV2RGBConvert(buf, &buf.x, bidx, hr);

        dst[0] = saturate_cast<uchar>(buf.x * 255.f);
        dst[1] = saturate_cast<uchar>(buf.y * 255.f);
        dst[2] = saturate_cast<uchar>(buf.z * 255.f);
    }

-    template <int SRCCN, int DSTCN, int HR, typename T> struct HSV2RGB
+    template <int SRCCN, int DSTCN, typename T> struct HSV2RGB
    {
        typedef typename TypeVec<T, SRCCN>::vec_t src_t;
        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;

-        explicit HSV2RGB(int bidx) : bidx(bidx) {}
+        HSV2RGB(int bidx, int hr) : bidx(bidx), hr(hr) {}

        __device__ __forceinline__ dst_t operator()(const src_t& src) const
        {
            dst_t dst;
-            HSV2RGBConvert<HR>(src, &dst.x, bidx);
+            HSV2RGBConvert(src, &dst.x, bidx, hr);
            setAlpha(dst, ColorChannel<T>::max());
            return dst;
        }

    private:
        int bidx;
+        int hr;
    };

    template <typename T, int SRCCN, int DSTCN>
    void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
    {
-        if (hrange == 180)
-        {
-            RGB2HSV<SRCCN, DSTCN, 180, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
-        else
-        {
-            RGB2HSV<SRCCN, DSTCN, 256, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
+        RGB2HSV<SRCCN, DSTCN, T> cvt(bidx, hrange);
+        callConvert(src, dst, cvt, stream);
    }

    void RGB2HSV_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*RGB2HSV_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const RGB2HSV_caller_t RGB2HSV_callers[2][2] = 
+        static const RGB2HSV_caller_t RGB2HSV_callers[2][2] =
        {
            {RGB2HSV_caller<uchar, 3, 3>, RGB2HSV_caller<uchar, 3, 4>},
            {RGB2HSV_caller<uchar, 4, 3>, RGB2HSV_caller<uchar, 4, 4>}
@@ -1104,34 +1087,26 @@ namespace cv { namespace gpu { namespace color
    void RGB2HSV_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*RGB2HSV_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const RGB2HSV_caller_t RGB2HSV_callers[2][2] = 
+        static const RGB2HSV_caller_t RGB2HSV_callers[2][2] =
        {
            {RGB2HSV_caller<float, 3, 3>, RGB2HSV_caller<float, 3, 4>},
            {RGB2HSV_caller<float, 4, 3>, RGB2HSV_caller<float, 4, 4>}
        };
-        
+
        RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
    }
-    
+
    template <typename T, int SRCCN, int DSTCN>
    void HSV2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
    {
-        if (hrange == 180)
-        {
-            HSV2RGB<SRCCN, DSTCN, 180, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
-        else
-        {
-            HSV2RGB<SRCCN, DSTCN, 255, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
+        HSV2RGB<SRCCN, DSTCN, T> cvt(bidx, hrange);
+        callConvert(src, dst, cvt, stream);
    }

    void HSV2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*HSV2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const HSV2RGB_caller_t HSV2RGB_callers[2][2] = 
+        static const HSV2RGB_caller_t HSV2RGB_callers[2][2] =
        {
            {HSV2RGB_caller<uchar, 3, 3>, HSV2RGB_caller<uchar, 3, 4>},
            {HSV2RGB_caller<uchar, 4, 3>, HSV2RGB_caller<uchar, 4, 4>}
@@ -1143,21 +1118,20 @@ namespace cv { namespace gpu { namespace color
    void HSV2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*HSV2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const HSV2RGB_caller_t HSV2RGB_callers[2][2] = 
+        static const HSV2RGB_caller_t HSV2RGB_callers[2][2] =
        {
            {HSV2RGB_caller<float, 3, 3>, HSV2RGB_caller<float, 3, 4>},
            {HSV2RGB_caller<float, 4, 3>, HSV2RGB_caller<float, 4, 4>}
        };
-        
+
        HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
    }

 /////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////

-    template <int HR, typename D>
-    __device__ void RGB2HLSConvert(const float* src, D& dst, int bidx)
+    template <typename D> __device__ void RGB2HLSConvert(const float* src, D& dst, int bidx, int hr)
    {
-        const float hscale = HR * (1.f/360.f);
+        const float hscale = hr * (1.f / 360.f);

        float b = src[bidx], g = src[1], r = src[bidx^2];
        float h = 0.f, s = 0.f, l;
@@ -1191,8 +1165,7 @@ namespace cv { namespace gpu { namespace color
        dst.y = l;
        dst.z = s;
    }
-    template <int HR, typename D>
-    __device__ void RGB2HLSConvert(const uchar* src, D& dst, int bidx)
+    template <typename D> __device__ void RGB2HLSConvert(const uchar* src, D& dst, int bidx, int hr)
    {
        float3 buf;

@@ -1200,40 +1173,40 @@ namespace cv { namespace gpu { namespace color
        buf.y = src[1]*(1.f/255.f);
        buf.z = src[2]*(1.f/255.f);

-        RGB2HLSConvert<HR>(&buf.x, buf, bidx);
+        RGB2HLSConvert(&buf.x, buf, bidx, hr);

        dst.x = saturate_cast<uchar>(buf.x);
        dst.y = saturate_cast<uchar>(buf.y*255.f);
        dst.z = saturate_cast<uchar>(buf.z*255.f);
    }

-    template <int SRCCN, int DSTCN, int HR, typename T> struct RGB2HLS
+    template <int SRCCN, int DSTCN, typename T> struct RGB2HLS
    {
        typedef typename TypeVec<T, SRCCN>::vec_t src_t;
        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;

-        explicit RGB2HLS(int bidx) : bidx(bidx) {}
+        RGB2HLS(int bidx, int hr) : bidx(bidx), hr(hr) {}

        __device__ __forceinline__ dst_t operator()(const src_t& src) const
        {
            dst_t dst;
-            RGB2HLSConvert<HR>(&src.x, dst, bidx);
+            RGB2HLSConvert(&src.x, dst, bidx, hr);
            return dst;
        }

    private:
        int bidx;
+        int hr;
    };
-    
-    __constant__ int cHlsSectorData[6][3] = 
+
+    __constant__ int cHlsSectorData[6][3] =
    {
        {1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}
    };

-    template <int HR, typename T>
-    __device__ void HLS2RGBConvert(const T& src, float* dst, int bidx)
+    template <typename T> __device__ void HLS2RGBConvert(const T& src, float* dst, int bidx, int hr)
    {
-        const float hscale = 6.0f / HR;
+        const float hscale = 6.0f / hr;

        float h = src.x, l = src.y, s = src.z;
        float b, g, r;
@@ -1272,8 +1245,7 @@ namespace cv { namespace gpu { namespace color
        dst[1] = g;
        dst[bidx^2] = r;
    }
-    template <int HR, typename T>
-    __device__ void HLS2RGBConvert(const T& src, uchar* dst, int bidx)
+    template <typename T> __device__ void HLS2RGBConvert(const T& src, uchar* dst, int bidx, int hr)
    {
        float3 buf;

@@ -1281,51 +1253,44 @@ namespace cv { namespace gpu { namespace color
        buf.y = src.y*(1.f/255.f);
        buf.z = src.z*(1.f/255.f);

-        HLS2RGBConvert<HR>(buf, &buf.x, bidx);
+        HLS2RGBConvert(buf, &buf.x, bidx, hr);

        dst[0] = saturate_cast<uchar>(buf.x*255.f);
        dst[1] = saturate_cast<uchar>(buf.y*255.f);
        dst[2] = saturate_cast<uchar>(buf.z*255.f);
    }

-    template <int SRCCN, int DSTCN, int HR, typename T> struct HLS2RGB
+    template <int SRCCN, int DSTCN, typename T> struct HLS2RGB
    {
        typedef typename TypeVec<T, SRCCN>::vec_t src_t;
        typedef typename TypeVec<T, DSTCN>::vec_t dst_t;

-        explicit HLS2RGB(int bidx) : bidx(bidx) {}
+        HLS2RGB(int bidx, int hr) : bidx(bidx), hr(hr) {}

        __device__ __forceinline__ dst_t operator()(const src_t& src) const
        {
            dst_t dst;
-            HLS2RGBConvert<HR>(src, &dst.x, bidx);
+            HLS2RGBConvert(src, &dst.x, bidx, hr);
            setAlpha(dst, ColorChannel<T>::max());
            return dst;
        }

    private:
        int bidx;
+        int hr;
    };

    template <typename T, int SRCCN, int DSTCN>
    void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
    {
-        if (hrange == 180)
-        {
-            RGB2HLS<SRCCN, DSTCN, 180, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
-        else
-        {
-            RGB2HLS<SRCCN, DSTCN, 256, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
+        RGB2HLS<SRCCN, DSTCN, T> cvt(bidx, hrange);
+        callConvert(src, dst, cvt, stream);
    }

    void RGB2HLS_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*RGB2HLS_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const RGB2HLS_caller_t RGB2HLS_callers[2][2] = 
+        static const RGB2HLS_caller_t RGB2HLS_callers[2][2] =
        {
            {RGB2HLS_caller<uchar, 3, 3>, RGB2HLS_caller<uchar, 3, 4>},
            {RGB2HLS_caller<uchar, 4, 3>, RGB2HLS_caller<uchar, 4, 4>}
@@ -1337,35 +1302,27 @@ namespace cv { namespace gpu { namespace color
    void RGB2HLS_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*RGB2HLS_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const RGB2HLS_caller_t RGB2HLS_callers[2][2] = 
+        static const RGB2HLS_caller_t RGB2HLS_callers[2][2] =
        {
            {RGB2HLS_caller<float, 3, 3>, RGB2HLS_caller<float, 3, 4>},
            {RGB2HLS_caller<float, 4, 3>, RGB2HLS_caller<float, 4, 4>}
        };
-        
+
        RGB2HLS_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
    }

-    
+
    template <typename T, int SRCCN, int DSTCN>
    void HLS2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)
    {
-        if (hrange == 180)
-        {
-            HLS2RGB<SRCCN, DSTCN, 180, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
-        else
-        {
-            HLS2RGB<SRCCN, DSTCN, 255, T> cvt(bidx);
-            callConvert(src, dst, cvt, stream);
-        }
+        HLS2RGB<SRCCN, DSTCN, T> cvt(bidx, hrange);
+        callConvert(src, dst, cvt, stream);
    }

    void HLS2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*HLS2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const HLS2RGB_caller_t HLS2RGB_callers[2][2] = 
+        static const HLS2RGB_caller_t HLS2RGB_callers[2][2] =
        {
            {HLS2RGB_caller<uchar, 3, 3>, HLS2RGB_caller<uchar, 3, 4>},
            {HLS2RGB_caller<uchar, 4, 3>, HLS2RGB_caller<uchar, 4, 4>}
@@ -1377,12 +1334,12 @@ namespace cv { namespace gpu { namespace color
    void HLS2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, int hrange, cudaStream_t stream)
    {
        typedef void (*HLS2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream);
-        static const HLS2RGB_caller_t HLS2RGB_callers[2][2] = 
+        static const HLS2RGB_caller_t HLS2RGB_callers[2][2] =
        {
            {HLS2RGB_caller<float, 3, 3>, HLS2RGB_caller<float, 3, 4>},
            {HLS2RGB_caller<float, 4, 3>, HLS2RGB_caller<float, 4, 4>}
        };
-                
+
        HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);
    }
 }}}
--- a/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/limits_gpu.hpp
@@ -44,11 +44,11 @@
 #define __OPENCV_GPU_LIMITS_GPU_HPP__

 namespace cv { namespace gpu { namespace device
-{     
+{
    template<class T> struct numeric_limits_gpu
-    {	        
+    {
        typedef T type;
-        __device__ __forceinline__ static type min()  { return type(); };            
+        __device__ __forceinline__ static type min()  { return type(); };
        __device__ __forceinline__ static type max() { return type(); };
        __device__ __forceinline__ static type epsilon() { return type(); }
        __device__ __forceinline__ static type round_error() { return type(); }
@@ -60,9 +60,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<bool>
-    {	        
+    {
        typedef bool type;
-        __device__ __forceinline__ static type min() { return false; };            
+        __device__ __forceinline__ static type min() { return false; };
        __device__ __forceinline__ static type max() { return true;  };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -74,9 +74,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<char>
-    {	        
+    {
        typedef char type;
-        __device__ __forceinline__ static type min() { return CHAR_MIN; };            
+        __device__ __forceinline__ static type min() { return CHAR_MIN; };
        __device__ __forceinline__ static type max() { return CHAR_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -88,9 +88,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<unsigned char>
-    {	        
+    {
        typedef unsigned char type;
-        __device__ __forceinline__ static type min() { return 0; };            
+        __device__ __forceinline__ static type min() { return 0; };
        __device__ __forceinline__ static type max() { return UCHAR_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -102,9 +102,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<short>
-    {	
+    {
        typedef short type;
-        __device__ __forceinline__ static type min() { return SHRT_MIN; };            
+        __device__ __forceinline__ static type min() { return SHRT_MIN; };
        __device__ __forceinline__ static type max() { return SHRT_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -116,9 +116,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<unsigned short>
-    {	        
+    {
        typedef unsigned short type;
-        __device__ __forceinline__ static type min() { return 0; };            
+        __device__ __forceinline__ static type min() { return 0; };
        __device__ __forceinline__ static type max() { return USHRT_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -130,9 +130,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<int>
-    {	  
+    {
        typedef int type;
-        __device__ __forceinline__ static type min() { return INT_MIN; };            
+        __device__ __forceinline__ static type min() { return INT_MIN; };
        __device__ __forceinline__ static type max() { return INT_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -145,9 +145,9 @@ namespace cv { namespace gpu { namespace device


    template<> struct numeric_limits_gpu<unsigned int>
-    {	        
+    {
        typedef unsigned int type;
-        __device__ __forceinline__ static type min() { return 0; };            
+        __device__ __forceinline__ static type min() { return 0; };
        __device__ __forceinline__ static type max() { return UINT_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -159,9 +159,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<long>
-    {	        
+    {
        typedef long type;
-        __device__ __forceinline__ static type min() { return LONG_MIN; };            
+        __device__ __forceinline__ static type min() { return LONG_MIN; };
        __device__ __forceinline__ static type max() { return LONG_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -173,9 +173,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<unsigned long>
-    {	        
+    {
        typedef unsigned long type;
-        __device__ __forceinline__ static type min() { return 0; };            
+        __device__ __forceinline__ static type min() { return 0; };
        __device__ __forceinline__ static type max() { return ULONG_MAX; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -185,11 +185,11 @@ namespace cv { namespace gpu { namespace device
        __device__ __forceinline__ static type signaling_NaN();
        static const bool is_signed = false;
    };
-                
+
    template<> struct numeric_limits_gpu<float>
-    {	        
+    {
        typedef float type;
-        __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };            
+        __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
        __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
        __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
        __device__ __forceinline__ static type round_error();
@@ -201,9 +201,9 @@ namespace cv { namespace gpu { namespace device
    };

    template<> struct numeric_limits_gpu<double>
-    {	        
+    {
        typedef double type;
-        __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };            
+        __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
        __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
        __device__ __forceinline__ static type epsilon();
        __device__ __forceinline__ static type round_error();
@@ -212,7 +212,7 @@ namespace cv { namespace gpu { namespace device
        __device__ __forceinline__ static type quiet_NaN();
        __device__ __forceinline__ static type signaling_NaN();
        static const bool is_signed = true;
-    };            
+    };
 }}}

 #endif // __OPENCV_GPU_LIMITS_GPU_HPP__
--- a/modules/gpu/test/test_imgproc_gpu.cpp
+++ b/modules/gpu/test/test_imgproc_gpu.cpp
@@ -461,17 +461,17 @@ protected:

 int CV_GpuCvtColorTest::CheckNorm(const Mat& m1, const Mat& m2)
 {
-    double ret = norm(m1, m2, NORM_INF);
+    float max_err = 1e-2f;

-    if (ret <= 3)
-    {
-        return cvtest::TS::OK;
-    }
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "\nNorm: %f\n", ret);
-        return cvtest::TS::FAIL_GENERIC;
-    }
+    Mat diff;
+    cv::matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+
+    float err = abs(diff.at<float>(0, 0) - 1.f);
+
+    if (err > max_err)
+        return cvtest::TS::FAIL_INVALID_OUTPUT;
+
+    return cvtest::TS::OK;
 }

 void CV_GpuCvtColorTest::run( int )
@@ -596,7 +596,7 @@ void CV_GpuHistogramsTest::run( int )
 ////////////////////////////////////////////////////////////////////////
 // Corner Harris feature detector

-struct CV_GpuCornerHarrisTest: cvtest::BaseTest 
+struct CV_GpuCornerHarrisTest: cvtest::BaseTest
 {
    CV_GpuCornerHarrisTest() {}

@@ -616,7 +616,7 @@ struct CV_GpuCornerHarrisTest: cvtest::BaseTest
    {
        RNG rng;
        cv::Mat src(rows, cols, depth);
-        if (depth == CV_32F) 
+        if (depth == CV_32F)
            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
        else if (depth == CV_8U)
            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
@@ -629,7 +629,7 @@ struct CV_GpuCornerHarrisTest: cvtest::BaseTest
        int borderType;

        borderType = BORDER_REFLECT101;
-        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); 
+        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);

        dsth = dst;
@@ -639,7 +639,7 @@ struct CV_GpuCornerHarrisTest: cvtest::BaseTest
            {
                float a = dst_gold.at<float>(i, j);
                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-3f) 
+                if (fabs(a - b) > 1e-3f)
                {
                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize);
                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
@@ -649,7 +649,7 @@ struct CV_GpuCornerHarrisTest: cvtest::BaseTest
        }

        borderType = BORDER_REPLICATE;
-        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); 
+        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);

        dsth = dst;
@@ -659,7 +659,7 @@ struct CV_GpuCornerHarrisTest: cvtest::BaseTest
            {
                float a = dst_gold.at<float>(i, j);
                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-3f) 
+                if (fabs(a - b) > 1e-3f)
                {
                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize);
                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
@@ -674,7 +674,7 @@ struct CV_GpuCornerHarrisTest: cvtest::BaseTest
 ////////////////////////////////////////////////////////////////////////
 // Corner Min Eigen Val

-struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest 
+struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
 {
    CV_GpuCornerMinEigenValTest() {}

@@ -694,7 +694,7 @@ struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
    {
        RNG rng;
        cv::Mat src(rows, cols, depth);
-        if (depth == CV_32F) 
+        if (depth == CV_32F)
            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
        else if (depth == CV_8U)
            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
@@ -706,8 +706,8 @@ struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
        int borderType;

        borderType = BORDER_REFLECT101;
-        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); 
-        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);      
+        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
+        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);

        dsth = dst;
        for (int i = 0; i < dst.rows; ++i)
@@ -716,7 +716,7 @@ struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
            {
                float a = dst_gold.at<float>(i, j);
                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-2f) 
+                if (fabs(a - b) > 1e-2f)
                {
                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
@@ -726,8 +726,8 @@ struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
        }

        borderType = BORDER_REPLICATE;
-        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); 
-        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);      
+        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
+        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);

        dsth = dst;
        for (int i = 0; i < dst.rows; ++i)
@@ -736,7 +736,7 @@ struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
            {
                float a = dst_gold.at<float>(i, j);
                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-2f) 
+                if (fabs(a - b) > 1e-2f)
                {
                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
@@ -749,7 +749,7 @@ struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
    }
 };

-struct CV_GpuColumnSumTest: cvtest::BaseTest 
+struct CV_GpuColumnSumTest: cvtest::BaseTest
 {
    CV_GpuColumnSumTest() {}

@@ -794,7 +794,7 @@ struct CV_GpuColumnSumTest: cvtest::BaseTest
    }
 };

-struct CV_GpuNormTest : cvtest::BaseTest 
+struct CV_GpuNormTest : cvtest::BaseTest
 {
    CV_GpuNormTest() {}

@@ -924,12 +924,12 @@ TEST(downsample, accuracy_on_8U)
    for (int k = 2; k <= 5; ++k)
    {
        GpuMat d_dst;
-        downsample(GpuMat(src), d_dst, k);       
+        downsample(GpuMat(src), d_dst, k);

        Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k);
-        ASSERT_EQ(dst_gold_size.width, d_dst.cols) 
+        ASSERT_EQ(dst_gold_size.width, d_dst.cols)
            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-        ASSERT_EQ(dst_gold_size.height, d_dst.rows) 
+        ASSERT_EQ(dst_gold_size.height, d_dst.rows)
            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;

        Mat dst = d_dst;
@@ -949,12 +949,12 @@ TEST(downsample, accuracy_on_32F)
    for (int k = 2; k <= 5; ++k)
    {
        GpuMat d_dst;
-        downsample(GpuMat(src), d_dst, k);       
+        downsample(GpuMat(src), d_dst, k);

        Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k);
-        ASSERT_EQ(dst_gold_size.width, d_dst.cols) 
+        ASSERT_EQ(dst_gold_size.width, d_dst.cols)
            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-        ASSERT_EQ(dst_gold_size.height, d_dst.rows) 
+        ASSERT_EQ(dst_gold_size.height, d_dst.rows)
            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;

        Mat dst = d_dst;