Normalize line endings and whitespace
This commit is contained in:
committed by
Andrey Kamaev
parent
0442bca235
commit
81f826db2b
@@ -44,7 +44,7 @@ if (HAVE_CUDA)
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler /wd4251)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
|
||||
|
||||
#CUDA_BUILD_CLEAN_TARGET()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,221 +1,221 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_COLOR_HPP__
|
||||
#define __OPENCV_GPU_COLOR_HPP__
|
||||
|
||||
#include "detail/color_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||
// {
|
||||
// typedef ... functor_type;
|
||||
// static __host__ __device__ functor_type create_functor();
|
||||
// };
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_COLOR_HPP__
|
||||
#define __OPENCV_GPU_COLOR_HPP__
|
||||
|
||||
#include "detail/color_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||
// {
|
||||
// typedef ... functor_type;
|
||||
// static __host__ __device__ functor_type create_functor();
|
||||
// };
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||
|
||||
@@ -1,114 +1,114 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_COMMON_HPP__
|
||||
#define __OPENCV_GPU_COMMON_HPP__
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
|
||||
#ifndef CV_PI
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#endif
|
||||
|
||||
#ifndef CV_PI_F
|
||||
#ifndef CV_PI
|
||||
#define CV_PI_F 3.14159265f
|
||||
#else
|
||||
#define CV_PI_F ((float)CV_PI)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void error(const char *error_string, const char *file, const int line, const char *func);
|
||||
|
||||
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
}
|
||||
|
||||
static inline bool isAligned(size_t step, size_t size)
|
||||
{
|
||||
return step % size == 0;
|
||||
}
|
||||
}}
|
||||
|
||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
namespace device
|
||||
{
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef signed char schar;
|
||||
typedef unsigned int uint;
|
||||
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
||||
#endif // __OPENCV_GPU_COMMON_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_COMMON_HPP__
|
||||
#define __OPENCV_GPU_COMMON_HPP__
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
|
||||
#ifndef CV_PI
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
#endif
|
||||
|
||||
#ifndef CV_PI_F
|
||||
#ifndef CV_PI
|
||||
#define CV_PI_F 3.14159265f
|
||||
#else
|
||||
#define CV_PI_F ((float)CV_PI)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void error(const char *error_string, const char *file, const int line, const char *func);
|
||||
|
||||
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||
{
|
||||
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||
}
|
||||
|
||||
static inline bool isAligned(size_t step, size_t size)
|
||||
{
|
||||
return step % size == 0;
|
||||
}
|
||||
}}
|
||||
|
||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (cudaSuccess != err)
|
||||
cv::gpu::error(cudaGetErrorString(err), file, line, func);
|
||||
}
|
||||
|
||||
#ifdef __CUDACC__
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
namespace device
|
||||
{
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef signed char schar;
|
||||
typedef unsigned int uint;
|
||||
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // __CUDACC__
|
||||
|
||||
#endif // __OPENCV_GPU_COMMON_HPP__
|
||||
|
||||
@@ -1,105 +1,105 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
|
||||
|
||||
// for Fermi memory space is detected automatically
|
||||
template <typename T> struct ForceGlob
|
||||
{
|
||||
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||
};
|
||||
|
||||
#else // __CUDA_ARCH__ >= 200
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__)
|
||||
// 64-bit register modifier for inlined asm
|
||||
#define OPENCV_GPU_ASM_PTR "l"
|
||||
#else
|
||||
// 32-bit register modifier for inlined asm
|
||||
#define OPENCV_GPU_ASM_PTR "r"
|
||||
#endif
|
||||
|
||||
template<class T> struct ForceGlob;
|
||||
|
||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
|
||||
|
||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
|
||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
||||
#undef OPENCV_GPU_ASM_PTR
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 200
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
|
||||
|
||||
// for Fermi memory space is detected automatically
|
||||
template <typename T> struct ForceGlob
|
||||
{
|
||||
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||
};
|
||||
|
||||
#else // __CUDA_ARCH__ >= 200
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__)
|
||||
// 64-bit register modifier for inlined asm
|
||||
#define OPENCV_GPU_ASM_PTR "l"
|
||||
#else
|
||||
// 32-bit register modifier for inlined asm
|
||||
#define OPENCV_GPU_ASM_PTR "r"
|
||||
#endif
|
||||
|
||||
template<class T> struct ForceGlob;
|
||||
|
||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
|
||||
|
||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
|
||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
||||
#undef OPENCV_GPU_ASM_PTR
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 200
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,395 +1,395 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
#include "../functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace transform_detail
|
||||
{
|
||||
//! Read Write Traits
|
||||
|
||||
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T, shift>::vec_type read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T1, shift>::vec_type read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_type read_type2;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <int shift> struct OpUnroller;
|
||||
template <> struct OpUnroller<1>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<2>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<3>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<4>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<8>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src.a7);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src1.a0, src2.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src1.a1, src2.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src1.a2, src2.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src1.a3, src2.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src1.a4, src2.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src1.a5, src2.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src1.a6, src2.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src1.a7, src2.a7);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src_.cols)
|
||||
{
|
||||
const read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el = ((const write_type*)dst)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows && mask(y, x))
|
||||
{
|
||||
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
|
||||
{
|
||||
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el = ((const write_type*)dst)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||
{
|
||||
const T1 src1_data = src1.ptr(y)[x];
|
||||
const T2 src2_data = src2.ptr(y)[x];
|
||||
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UseSmart> struct TransformDispatcher;
|
||||
template<> struct TransformDispatcher<false>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
|
||||
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
|
||||
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
#include "../functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace transform_detail
|
||||
{
|
||||
//! Read Write Traits
|
||||
|
||||
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T, shift>::vec_type read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
|
||||
{
|
||||
typedef typename TypeVec<T1, shift>::vec_type read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_type read_type2;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <int shift> struct OpUnroller;
|
||||
template <> struct OpUnroller<1>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<2>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<3>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<4>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<8>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src.a7);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.a0 = op(src1.a0, src2.a0);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.a1 = op(src1.a1, src2.a1);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.a2 = op(src1.a2, src2.a2);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.a3 = op(src1.a3, src2.a3);
|
||||
if (mask(y, x_shifted + 4))
|
||||
dst.a4 = op(src1.a4, src2.a4);
|
||||
if (mask(y, x_shifted + 5))
|
||||
dst.a5 = op(src1.a5, src2.a5);
|
||||
if (mask(y, x_shifted + 6))
|
||||
dst.a6 = op(src1.a6, src2.a6);
|
||||
if (mask(y, x_shifted + 7))
|
||||
dst.a7 = op(src1.a7, src2.a7);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src_.cols)
|
||||
{
|
||||
const read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el = ((const write_type*)dst)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows && mask(y, x))
|
||||
{
|
||||
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
|
||||
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * ft::smart_shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
|
||||
{
|
||||
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el = ((const write_type*)dst)[x];
|
||||
|
||||
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
||||
const Mask mask, const BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||
{
|
||||
const T1 src1_data = src1.ptr(y)[x];
|
||||
const T2 src2_data = src2.ptr(y)[x];
|
||||
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UseSmart> struct TransformDispatcher;
|
||||
template<> struct TransformDispatcher<false>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
|
||||
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
|
||||
StaticAssert<ft::smart_shift != 1>::check();
|
||||
|
||||
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||
{
|
||||
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
|
||||
@@ -1,187 +1,187 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace type_traits_detail
|
||||
{
|
||||
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
||||
|
||||
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
|
||||
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
|
||||
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
|
||||
template <> struct IsIntegral<char> { enum {value = 1}; };
|
||||
template <> struct IsIntegral<bool> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsFloat { enum {value = 0}; };
|
||||
template <> struct IsFloat<float> { enum {value = 1}; };
|
||||
template <> struct IsFloat<double> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsVec { enum {value = 0}; };
|
||||
template <> struct IsVec<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar8> { enum {value = 1}; };
|
||||
template <> struct IsVec<char1> { enum {value = 1}; };
|
||||
template <> struct IsVec<char2> { enum {value = 1}; };
|
||||
template <> struct IsVec<char3> { enum {value = 1}; };
|
||||
template <> struct IsVec<char4> { enum {value = 1}; };
|
||||
template <> struct IsVec<char8> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort2> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort3> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort4> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort8> { enum {value = 1}; };
|
||||
template <> struct IsVec<short1> { enum {value = 1}; };
|
||||
template <> struct IsVec<short2> { enum {value = 1}; };
|
||||
template <> struct IsVec<short3> { enum {value = 1}; };
|
||||
template <> struct IsVec<short4> { enum {value = 1}; };
|
||||
template <> struct IsVec<short8> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint8> { enum {value = 1}; };
|
||||
template <> struct IsVec<int1> { enum {value = 1}; };
|
||||
template <> struct IsVec<int2> { enum {value = 1}; };
|
||||
template <> struct IsVec<int3> { enum {value = 1}; };
|
||||
template <> struct IsVec<int4> { enum {value = 1}; };
|
||||
template <> struct IsVec<int8> { enum {value = 1}; };
|
||||
template <> struct IsVec<float1> { enum {value = 1}; };
|
||||
template <> struct IsVec<float2> { enum {value = 1}; };
|
||||
template <> struct IsVec<float3> { enum {value = 1}; };
|
||||
template <> struct IsVec<float4> { enum {value = 1}; };
|
||||
template <> struct IsVec<float8> { enum {value = 1}; };
|
||||
template <> struct IsVec<double1> { enum {value = 1}; };
|
||||
template <> struct IsVec<double2> { enum {value = 1}; };
|
||||
template <> struct IsVec<double3> { enum {value = 1}; };
|
||||
template <> struct IsVec<double4> { enum {value = 1}; };
|
||||
template <> struct IsVec<double8> { enum {value = 1}; };
|
||||
|
||||
template <class U> struct AddParameterType { typedef const U& type; };
|
||||
template <class U> struct AddParameterType<U&> { typedef U& type; };
|
||||
template <> struct AddParameterType<void> { typedef void type; };
|
||||
|
||||
template <class U> struct ReferenceTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct ReferenceTraits<U&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct PointerTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef void type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct UnConst
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnConst<const U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnConst<const U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
|
||||
template <class U> struct UnVolatile
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
} // namespace type_traits_detail
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace type_traits_detail
|
||||
{
|
||||
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
||||
|
||||
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
|
||||
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
|
||||
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
|
||||
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
|
||||
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
|
||||
template <> struct IsIntegral<char> { enum {value = 1}; };
|
||||
template <> struct IsIntegral<bool> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsFloat { enum {value = 0}; };
|
||||
template <> struct IsFloat<float> { enum {value = 1}; };
|
||||
template <> struct IsFloat<double> { enum {value = 1}; };
|
||||
|
||||
template <typename T> struct IsVec { enum {value = 0}; };
|
||||
template <> struct IsVec<uchar1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uchar8> { enum {value = 1}; };
|
||||
template <> struct IsVec<char1> { enum {value = 1}; };
|
||||
template <> struct IsVec<char2> { enum {value = 1}; };
|
||||
template <> struct IsVec<char3> { enum {value = 1}; };
|
||||
template <> struct IsVec<char4> { enum {value = 1}; };
|
||||
template <> struct IsVec<char8> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort1> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort2> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort3> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort4> { enum {value = 1}; };
|
||||
template <> struct IsVec<ushort8> { enum {value = 1}; };
|
||||
template <> struct IsVec<short1> { enum {value = 1}; };
|
||||
template <> struct IsVec<short2> { enum {value = 1}; };
|
||||
template <> struct IsVec<short3> { enum {value = 1}; };
|
||||
template <> struct IsVec<short4> { enum {value = 1}; };
|
||||
template <> struct IsVec<short8> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint1> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint2> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint3> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint4> { enum {value = 1}; };
|
||||
template <> struct IsVec<uint8> { enum {value = 1}; };
|
||||
template <> struct IsVec<int1> { enum {value = 1}; };
|
||||
template <> struct IsVec<int2> { enum {value = 1}; };
|
||||
template <> struct IsVec<int3> { enum {value = 1}; };
|
||||
template <> struct IsVec<int4> { enum {value = 1}; };
|
||||
template <> struct IsVec<int8> { enum {value = 1}; };
|
||||
template <> struct IsVec<float1> { enum {value = 1}; };
|
||||
template <> struct IsVec<float2> { enum {value = 1}; };
|
||||
template <> struct IsVec<float3> { enum {value = 1}; };
|
||||
template <> struct IsVec<float4> { enum {value = 1}; };
|
||||
template <> struct IsVec<float8> { enum {value = 1}; };
|
||||
template <> struct IsVec<double1> { enum {value = 1}; };
|
||||
template <> struct IsVec<double2> { enum {value = 1}; };
|
||||
template <> struct IsVec<double3> { enum {value = 1}; };
|
||||
template <> struct IsVec<double4> { enum {value = 1}; };
|
||||
template <> struct IsVec<double8> { enum {value = 1}; };
|
||||
|
||||
template <class U> struct AddParameterType { typedef const U& type; };
|
||||
template <class U> struct AddParameterType<U&> { typedef U& type; };
|
||||
template <> struct AddParameterType<void> { typedef void type; };
|
||||
|
||||
template <class U> struct ReferenceTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct ReferenceTraits<U&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct PointerTraits
|
||||
{
|
||||
enum { value = false };
|
||||
typedef void type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
template <class U> struct PointerTraits<U*&>
|
||||
{
|
||||
enum { value = true };
|
||||
typedef U type;
|
||||
};
|
||||
|
||||
template <class U> struct UnConst
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnConst<const U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnConst<const U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
|
||||
template <class U> struct UnVolatile
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 0 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U>
|
||||
{
|
||||
typedef U type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
template <class U> struct UnVolatile<volatile U&>
|
||||
{
|
||||
typedef U& type;
|
||||
enum { value = 1 };
|
||||
};
|
||||
} // namespace type_traits_detail
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||
|
||||
@@ -1,117 +1,117 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||
|
||||
#include "../datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace vec_distance_detail
|
||||
{
|
||||
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
|
||||
{
|
||||
if (ind < len)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, ind, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, 0, val2);
|
||||
vecGlob += THREAD_DIM;
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
||||
}
|
||||
};
|
||||
} // namespace vec_distance_detail
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||
|
||||
#include "../datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace vec_distance_detail
|
||||
{
|
||||
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
|
||||
{
|
||||
if (ind < len)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, ind, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
|
||||
{
|
||||
T1 val1 = *vecCached++;
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vecGlob, 0, val2);
|
||||
vecGlob += THREAD_DIM;
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
|
||||
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
|
||||
}
|
||||
};
|
||||
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
|
||||
{
|
||||
template <typename Dist, typename T1, typename T2>
|
||||
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||
{
|
||||
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
||||
}
|
||||
};
|
||||
} // namespace vec_distance_detail
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||
|
||||
@@ -1,80 +1,80 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class T> struct DynamicSharedMem
|
||||
{
|
||||
__device__ __forceinline__ operator T*()
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const T*() const
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
};
|
||||
|
||||
// specialize for double to avoid unaligned memory access compile errors
|
||||
template<> struct DynamicSharedMem<double>
|
||||
{
|
||||
__device__ __forceinline__ operator double*()
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const double*() const
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class T> struct DynamicSharedMem
|
||||
{
|
||||
__device__ __forceinline__ operator T*()
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const T*() const
|
||||
{
|
||||
extern __shared__ int __smem[];
|
||||
return (T*)__smem;
|
||||
}
|
||||
};
|
||||
|
||||
// specialize for double to avoid unaligned memory access compile errors
|
||||
template<> struct DynamicSharedMem<double>
|
||||
{
|
||||
__device__ __forceinline__ operator double*()
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator const double*() const
|
||||
{
|
||||
extern __shared__ double __smem_d[];
|
||||
return (double*)__smem_d;
|
||||
}
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||
|
||||
@@ -1,278 +1,278 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_FILTERS_HPP__
|
||||
#define __OPENCV_GPU_FILTERS_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "vec_math.hpp"
|
||||
#include "type_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename Ptr2D> struct PointFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
return src(__float2int_rz(y), __float2int_rz(x));
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct LinearFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
work_type out = VecTraits<work_type>::all(0);
|
||||
|
||||
const int x1 = __float2int_rd(x);
|
||||
const int y1 = __float2int_rd(y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
|
||||
elem_type src_reg = src(y1, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y2 - y));
|
||||
|
||||
src_reg = src(y1, x2);
|
||||
out = out + src_reg * ((x - x1) * (y2 - y));
|
||||
|
||||
src_reg = src(y2, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y - y1));
|
||||
|
||||
src_reg = src(y2, x2);
|
||||
out = out + src_reg * ((x - x1) * (y - y1));
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct CubicFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
float x = fabsf(x_);
|
||||
if (x <= 1.0f)
|
||||
{
|
||||
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||
}
|
||||
else if (x < 2.0f)
|
||||
{
|
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
const float xmin = ::ceilf(x - 2.0f);
|
||||
const float xmax = ::floorf(x + 2.0f);
|
||||
|
||||
const float ymin = ::ceilf(y - 2.0f);
|
||||
const float ymax = ::floorf(y + 2.0f);
|
||||
|
||||
work_type sum = VecTraits<work_type>::all(0);
|
||||
float wsum = 0.0f;
|
||||
|
||||
for (float cy = ymin; cy <= ymax; cy += 1.0f)
|
||||
{
|
||||
for (float cx = xmin; cx <= xmax; cx += 1.0f)
|
||||
{
|
||||
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
|
||||
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
|
||||
wsum += w;
|
||||
}
|
||||
}
|
||||
|
||||
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
|
||||
|
||||
return saturate_cast<elem_type>(res);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
// for integer scaling
|
||||
template <typename Ptr2D> struct IntegerAreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for(int dy = sy1; dy < sy2; ++dy)
|
||||
for(int dx = sx1; dx < sx2; ++dx)
|
||||
{
|
||||
out = out + src(dy, dx) * scale;
|
||||
}
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y ,scale;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct AreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for (int dy = sy1; dy < sy2; ++dy)
|
||||
{
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(dy, dx) * scale;
|
||||
|
||||
if (sx1 > fsx1)
|
||||
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
|
||||
|
||||
if (sx2 < fsx2)
|
||||
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
|
||||
}
|
||||
|
||||
if (sy1 > fsy1)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
|
||||
|
||||
if (sy2 < fsy2)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx1 > fsx1))
|
||||
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx2 < fsx2))
|
||||
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx2 < fsx2))
|
||||
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx1 > fsx1))
|
||||
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y;
|
||||
int width, haight;
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_FILTERS_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_FILTERS_HPP__
|
||||
#define __OPENCV_GPU_FILTERS_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "vec_math.hpp"
|
||||
#include "type_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename Ptr2D> struct PointFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
return src(__float2int_rz(y), __float2int_rz(x));
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct LinearFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
work_type out = VecTraits<work_type>::all(0);
|
||||
|
||||
const int x1 = __float2int_rd(x);
|
||||
const int y1 = __float2int_rd(y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
|
||||
elem_type src_reg = src(y1, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y2 - y));
|
||||
|
||||
src_reg = src(y1, x2);
|
||||
out = out + src_reg * ((x - x1) * (y2 - y));
|
||||
|
||||
src_reg = src(y2, x1);
|
||||
out = out + src_reg * ((x2 - x) * (y - y1));
|
||||
|
||||
src_reg = src(y2, x2);
|
||||
out = out + src_reg * ((x - x1) * (y - y1));
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct CubicFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||
: src(src_)
|
||||
{
|
||||
(void)fx;
|
||||
(void)fy;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
float x = fabsf(x_);
|
||||
if (x <= 1.0f)
|
||||
{
|
||||
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||
}
|
||||
else if (x < 2.0f)
|
||||
{
|
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
const float xmin = ::ceilf(x - 2.0f);
|
||||
const float xmax = ::floorf(x + 2.0f);
|
||||
|
||||
const float ymin = ::ceilf(y - 2.0f);
|
||||
const float ymax = ::floorf(y + 2.0f);
|
||||
|
||||
work_type sum = VecTraits<work_type>::all(0);
|
||||
float wsum = 0.0f;
|
||||
|
||||
for (float cy = ymin; cy <= ymax; cy += 1.0f)
|
||||
{
|
||||
for (float cx = xmin; cx <= xmax; cx += 1.0f)
|
||||
{
|
||||
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
|
||||
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
|
||||
wsum += w;
|
||||
}
|
||||
}
|
||||
|
||||
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
|
||||
|
||||
return saturate_cast<elem_type>(res);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
// for integer scaling
|
||||
template <typename Ptr2D> struct IntegerAreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for(int dy = sy1; dy < sy2; ++dy)
|
||||
for(int dx = sx1; dx < sx2; ++dx)
|
||||
{
|
||||
out = out + src(dy, dx) * scale;
|
||||
}
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y ,scale;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct AreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for (int dy = sy1; dy < sy2; ++dy)
|
||||
{
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(dy, dx) * scale;
|
||||
|
||||
if (sx1 > fsx1)
|
||||
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
|
||||
|
||||
if (sx2 < fsx2)
|
||||
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
|
||||
}
|
||||
|
||||
if (sy1 > fsy1)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
|
||||
|
||||
if (sy2 < fsy2)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx1 > fsx1))
|
||||
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx2 < fsx2))
|
||||
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx2 < fsx2))
|
||||
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx1 > fsx1))
|
||||
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y;
|
||||
int width, haight;
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_FILTERS_HPP__
|
||||
|
||||
@@ -1,72 +1,72 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
|
||||
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class Func>
|
||||
void printFuncAttrib(Func& func)
|
||||
{
|
||||
|
||||
cudaFuncAttributes attrs;
|
||||
cudaFuncGetAttributes(&attrs, func);
|
||||
|
||||
printf("=== Function stats ===\n");
|
||||
printf("Name: \n");
|
||||
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
|
||||
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
|
||||
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
|
||||
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
|
||||
printf("numRegs = %d\n", attrs.numRegs);
|
||||
printf("ptxVersion = %d\n", attrs.ptxVersion);
|
||||
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
|
||||
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class Func>
|
||||
void printFuncAttrib(Func& func)
|
||||
{
|
||||
|
||||
cudaFuncAttributes attrs;
|
||||
cudaFuncGetAttributes(&attrs, func);
|
||||
|
||||
printf("=== Function stats ===\n");
|
||||
printf("Name: \n");
|
||||
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
|
||||
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
|
||||
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
|
||||
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
|
||||
printf("numRegs = %d\n", attrs.numRegs);
|
||||
printf("ptxVersion = %d\n", attrs.ptxVersion);
|
||||
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,235 +1,235 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||
#define __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class T> struct numeric_limits
|
||||
{
|
||||
typedef T type;
|
||||
__device__ __forceinline__ static type min() { return type(); };
|
||||
__device__ __forceinline__ static type max() { return type(); };
|
||||
__device__ __forceinline__ static type epsilon() { return type(); }
|
||||
__device__ __forceinline__ static type round_error() { return type(); }
|
||||
__device__ __forceinline__ static type denorm_min() { return type(); }
|
||||
__device__ __forceinline__ static type infinity() { return type(); }
|
||||
__device__ __forceinline__ static type quiet_NaN() { return type(); }
|
||||
__device__ __forceinline__ static type signaling_NaN() { return T(); }
|
||||
static const bool is_signed;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<bool>
|
||||
{
|
||||
typedef bool type;
|
||||
__device__ __forceinline__ static type min() { return false; };
|
||||
__device__ __forceinline__ static type max() { return true; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
||||
__device__ __forceinline__ static type max() { return CHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = (char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<signed char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
|
||||
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = (signed char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<unsigned char>
|
||||
{
|
||||
typedef unsigned char type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<short>
|
||||
{
|
||||
typedef short type;
|
||||
__device__ __forceinline__ static type min() { return SHRT_MIN; };
|
||||
__device__ __forceinline__ static type max() { return SHRT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<unsigned short>
|
||||
{
|
||||
typedef unsigned short type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return USHRT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<int>
|
||||
{
|
||||
typedef int type;
|
||||
__device__ __forceinline__ static type min() { return INT_MIN; };
|
||||
__device__ __forceinline__ static type max() { return INT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
|
||||
template<> struct numeric_limits<unsigned int>
|
||||
{
|
||||
typedef unsigned int type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return UINT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<long>
|
||||
{
|
||||
typedef long type;
|
||||
__device__ __forceinline__ static type min() { return LONG_MIN; };
|
||||
__device__ __forceinline__ static type max() { return LONG_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<unsigned long>
|
||||
{
|
||||
typedef unsigned long type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return ULONG_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<float>
|
||||
{
|
||||
typedef float type;
|
||||
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
|
||||
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
|
||||
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<double>
|
||||
{
|
||||
typedef double type;
|
||||
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
|
||||
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device {
|
||||
|
||||
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||
#define __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class T> struct numeric_limits
|
||||
{
|
||||
typedef T type;
|
||||
__device__ __forceinline__ static type min() { return type(); };
|
||||
__device__ __forceinline__ static type max() { return type(); };
|
||||
__device__ __forceinline__ static type epsilon() { return type(); }
|
||||
__device__ __forceinline__ static type round_error() { return type(); }
|
||||
__device__ __forceinline__ static type denorm_min() { return type(); }
|
||||
__device__ __forceinline__ static type infinity() { return type(); }
|
||||
__device__ __forceinline__ static type quiet_NaN() { return type(); }
|
||||
__device__ __forceinline__ static type signaling_NaN() { return T(); }
|
||||
static const bool is_signed;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<bool>
|
||||
{
|
||||
typedef bool type;
|
||||
__device__ __forceinline__ static type min() { return false; };
|
||||
__device__ __forceinline__ static type max() { return true; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
||||
__device__ __forceinline__ static type max() { return CHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = (char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<signed char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
|
||||
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = (signed char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<unsigned char>
|
||||
{
|
||||
typedef unsigned char type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<short>
|
||||
{
|
||||
typedef short type;
|
||||
__device__ __forceinline__ static type min() { return SHRT_MIN; };
|
||||
__device__ __forceinline__ static type max() { return SHRT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<unsigned short>
|
||||
{
|
||||
typedef unsigned short type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return USHRT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<int>
|
||||
{
|
||||
typedef int type;
|
||||
__device__ __forceinline__ static type min() { return INT_MIN; };
|
||||
__device__ __forceinline__ static type max() { return INT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
|
||||
template<> struct numeric_limits<unsigned int>
|
||||
{
|
||||
typedef unsigned int type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return UINT_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<long>
|
||||
{
|
||||
typedef long type;
|
||||
__device__ __forceinline__ static type min() { return LONG_MIN; };
|
||||
__device__ __forceinline__ static type max() { return LONG_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<unsigned long>
|
||||
{
|
||||
typedef unsigned long type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
__device__ __forceinline__ static type max() { return ULONG_MAX; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<float>
|
||||
{
|
||||
typedef float type;
|
||||
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
|
||||
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
|
||||
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits<double>
|
||||
{
|
||||
typedef double type;
|
||||
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
|
||||
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
|
||||
__device__ __forceinline__ static type epsilon();
|
||||
__device__ __forceinline__ static type round_error();
|
||||
__device__ __forceinline__ static type denorm_min();
|
||||
__device__ __forceinline__ static type infinity();
|
||||
__device__ __forceinline__ static type quiet_NaN();
|
||||
__device__ __forceinline__ static type signaling_NaN();
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device {
|
||||
|
||||
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||
|
||||
@@ -1,216 +1,216 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__
|
||||
#define __OPENCV_GPU_SATURATE_CAST_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||
{
|
||||
return (uchar) ::max((int)v, 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||
{
|
||||
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||
{
|
||||
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||
{
|
||||
return (uchar) ::min(v, (uint)UCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||
{
|
||||
return saturate_cast<uchar>((uint)v);
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<uchar>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<uchar>(iv);
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||
{
|
||||
return (schar) ::min((int)v, SCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||
{
|
||||
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||
{
|
||||
return saturate_cast<schar>((int)v);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||
{
|
||||
return (schar) ::min(v, (uint)SCHAR_MAX);
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<schar>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<schar>(iv);
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||
{
|
||||
return (ushort) ::max((int)v, 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||
{
|
||||
return (ushort) ::max((int)v, 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||
{
|
||||
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||
{
|
||||
return (ushort) ::min(v, (uint)USHRT_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<ushort>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<ushort>(iv);
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||
{
|
||||
return (short) ::min((int)v, SHRT_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||
{
|
||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||
{
|
||||
return (short) ::min(v, (uint)SHRT_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<short>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<short>(iv);
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||
{
|
||||
return __float2int_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||
{
|
||||
return __float2uint_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__
|
||||
#define __OPENCV_GPU_SATURATE_CAST_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||
{
|
||||
return (uchar) ::max((int)v, 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||
{
|
||||
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||
{
|
||||
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||
{
|
||||
return (uchar) ::min(v, (uint)UCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||
{
|
||||
return saturate_cast<uchar>((uint)v);
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<uchar>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<uchar>(iv);
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||
{
|
||||
return (schar) ::min((int)v, SCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||
{
|
||||
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||
{
|
||||
return saturate_cast<schar>((int)v);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||
{
|
||||
return (schar) ::min(v, (uint)SCHAR_MAX);
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<schar>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<schar>(iv);
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||
{
|
||||
return (ushort) ::max((int)v, 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||
{
|
||||
return (ushort) ::max((int)v, 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||
{
|
||||
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||
{
|
||||
return (ushort) ::min(v, (uint)USHRT_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<ushort>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<ushort>(iv);
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||
{
|
||||
return (short) ::min((int)v, SHRT_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||
{
|
||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||
{
|
||||
return (short) ::min(v, (uint)SHRT_MAX);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||
{
|
||||
int iv = __float2int_rn(v);
|
||||
return saturate_cast<short>(iv);
|
||||
}
|
||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v);
|
||||
return saturate_cast<short>(iv);
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||
{
|
||||
return __float2int_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||
{
|
||||
return __float2uint_rn(v);
|
||||
}
|
||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
||||
|
||||
@@ -1,67 +1,67 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
|
||||
#define __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define __OPENCV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
|
||||
#else
|
||||
#define __OPENCV_GPU_HOST_DEVICE__
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace device
|
||||
{
|
||||
template<bool expr> struct Static {};
|
||||
|
||||
template<> struct Static<true>
|
||||
{
|
||||
__OPENCV_GPU_HOST_DEVICE__ static void check() {};
|
||||
};
|
||||
}
|
||||
}}
|
||||
|
||||
#undef __OPENCV_GPU_HOST_DEVICE__
|
||||
|
||||
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
|
||||
#define __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define __OPENCV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
|
||||
#else
|
||||
#define __OPENCV_GPU_HOST_DEVICE__
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace device
|
||||
{
|
||||
template<bool expr> struct Static {};
|
||||
|
||||
template<> struct Static<true>
|
||||
{
|
||||
__OPENCV_GPU_HOST_DEVICE__ static void check() {};
|
||||
};
|
||||
}
|
||||
}}
|
||||
|
||||
#undef __OPENCV_GPU_HOST_DEVICE__
|
||||
|
||||
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */
|
||||
|
||||
@@ -1,67 +1,67 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "detail/transform_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "detail/transform_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<UnOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
typedef TransformFunctorTraits<BinOp> ft;
|
||||
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
||||
|
||||
@@ -1,82 +1,82 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||
|
||||
#include "detail/type_traits_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct IsSimpleParameter
|
||||
{
|
||||
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
|
||||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
|
||||
};
|
||||
|
||||
template <typename T> struct TypeTraits
|
||||
{
|
||||
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
|
||||
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
|
||||
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
|
||||
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
||||
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
|
||||
|
||||
enum { isConst = type_traits_detail::UnConst<T>::value };
|
||||
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
|
||||
|
||||
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
|
||||
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
||||
|
||||
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
||||
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
|
||||
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
|
||||
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
|
||||
enum { isArith = isIntegral || isFloat };
|
||||
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
|
||||
|
||||
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
|
||||
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||
|
||||
#include "detail/type_traits_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct IsSimpleParameter
|
||||
{
|
||||
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
|
||||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
|
||||
};
|
||||
|
||||
template <typename T> struct TypeTraits
|
||||
{
|
||||
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
|
||||
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
|
||||
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
|
||||
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
||||
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
|
||||
|
||||
enum { isConst = type_traits_detail::UnConst<T>::value };
|
||||
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
|
||||
|
||||
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
|
||||
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
||||
|
||||
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
||||
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
|
||||
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
|
||||
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
|
||||
enum { isArith = isIntegral || isFloat };
|
||||
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
|
||||
|
||||
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
|
||||
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||
|
||||
@@ -1,237 +1,237 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_UTILITY_HPP__
|
||||
#define __OPENCV_GPU_UTILITY_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "datamov_utils.hpp"
|
||||
#include "detail/reduction_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
||||
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
||||
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// swap
|
||||
|
||||
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
||||
{
|
||||
const T temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Mask Reader
|
||||
|
||||
struct SingleMask
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
|
||||
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
};
|
||||
|
||||
struct SingleMaskChannels
|
||||
{
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
|
||||
: mask(mask_), channels(channels_) {}
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
|
||||
:mask(mask_.mask), channels(mask_.channels){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x / channels] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
int channels;
|
||||
};
|
||||
|
||||
struct MaskCollection
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
|
||||
: maskCollection(maskCollection_) {}
|
||||
|
||||
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
|
||||
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
|
||||
|
||||
__device__ __forceinline__ void next()
|
||||
{
|
||||
curMask = *maskCollection++;
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int z)
|
||||
{
|
||||
curMask = maskCollection[z];
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
uchar val;
|
||||
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
|
||||
}
|
||||
|
||||
const PtrStepb* maskCollection;
|
||||
PtrStepb curMask;
|
||||
};
|
||||
|
||||
struct WithOutMask
|
||||
{
|
||||
__device__ __forceinline__ WithOutMask(){}
|
||||
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
|
||||
|
||||
__device__ __forceinline__ void next() const
|
||||
{
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int) const
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int, int, uint offset = 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
|
||||
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
|
||||
{
|
||||
StaticAssert<n >= 8 && n <= 512>::check();
|
||||
utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
|
||||
}
|
||||
|
||||
template <int n, typename T, typename V, typename Pred>
|
||||
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
|
||||
{
|
||||
StaticAssert<n >= 8 && n <= 512>::check();
|
||||
utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
|
||||
}
|
||||
|
||||
template <int n, typename T, typename V1, typename V2, typename Pred>
|
||||
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
|
||||
{
|
||||
StaticAssert<n >= 8 && n <= 512>::check();
|
||||
utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Solve linear system
|
||||
|
||||
// solve 2x2 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||
{
|
||||
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||
{
|
||||
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet *
|
||||
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
|
||||
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
|
||||
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
|
||||
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
|
||||
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
|
||||
|
||||
x[2] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
|
||||
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
|
||||
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_UTILITY_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_UTILITY_HPP__
|
||||
#define __OPENCV_GPU_UTILITY_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "datamov_utils.hpp"
|
||||
#include "detail/reduction_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
||||
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
||||
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// swap
|
||||
|
||||
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
||||
{
|
||||
const T temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Mask Reader
|
||||
|
||||
struct SingleMask
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
|
||||
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
};
|
||||
|
||||
struct SingleMaskChannels
|
||||
{
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
|
||||
: mask(mask_), channels(channels_) {}
|
||||
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
|
||||
:mask(mask_.mask), channels(mask_.channels){}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x / channels] != 0;
|
||||
}
|
||||
|
||||
PtrStepb mask;
|
||||
int channels;
|
||||
};
|
||||
|
||||
struct MaskCollection
|
||||
{
|
||||
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
|
||||
: maskCollection(maskCollection_) {}
|
||||
|
||||
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
|
||||
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
|
||||
|
||||
__device__ __forceinline__ void next()
|
||||
{
|
||||
curMask = *maskCollection++;
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int z)
|
||||
{
|
||||
curMask = maskCollection[z];
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
uchar val;
|
||||
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
|
||||
}
|
||||
|
||||
const PtrStepb* maskCollection;
|
||||
PtrStepb curMask;
|
||||
};
|
||||
|
||||
struct WithOutMask
|
||||
{
|
||||
__device__ __forceinline__ WithOutMask(){}
|
||||
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
|
||||
|
||||
__device__ __forceinline__ void next() const
|
||||
{
|
||||
}
|
||||
__device__ __forceinline__ void setMask(int) const
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int, int, int) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ bool check(int, int, int, uint offset = 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
|
||||
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
|
||||
{
|
||||
StaticAssert<n >= 8 && n <= 512>::check();
|
||||
utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
|
||||
}
|
||||
|
||||
template <int n, typename T, typename V, typename Pred>
|
||||
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
|
||||
{
|
||||
StaticAssert<n >= 8 && n <= 512>::check();
|
||||
utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
|
||||
}
|
||||
|
||||
template <int n, typename T, typename V1, typename V2, typename Pred>
|
||||
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
|
||||
{
|
||||
StaticAssert<n >= 8 && n <= 512>::check();
|
||||
utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Solve linear system
|
||||
|
||||
// solve 2x2 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||
{
|
||||
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||
{
|
||||
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet *
|
||||
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
|
||||
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
|
||||
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
|
||||
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
|
||||
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
|
||||
|
||||
x[2] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
|
||||
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
|
||||
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_UTILITY_HPP__
|
||||
|
||||
@@ -1,224 +1,224 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||
#define __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||
|
||||
#include "utility.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "detail/vec_distance_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct L1Dist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum = __sad(val1, val2, mySum);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
template <> struct L1Dist<float>
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
mySum += ::fabs(val1 - val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct L2Dist
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
float reg = val1 - val2;
|
||||
mySum += reg * reg;
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return sqrtf(mySum);
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct HammingDist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ HammingDist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum += __popc(val1 ^ val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
||||
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
for (int i = tid; i < len; i += THREAD_DIM)
|
||||
{
|
||||
T1 val1;
|
||||
ForceGlob<T1>::Load(vec1, i, val1);
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vec2, i, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
}
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
||||
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
||||
{
|
||||
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
||||
{
|
||||
vec1 = vec1_;
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
const T1* vec1;
|
||||
};
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
||||
{
|
||||
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
||||
{
|
||||
if (glob_tid < len)
|
||||
smem[glob_tid] = vec1[glob_tid];
|
||||
__syncthreads();
|
||||
|
||||
U* vec1ValsPtr = vec1Vals;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
|
||||
*vec1ValsPtr++ = smem[i];
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||
#define __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||
|
||||
#include "utility.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "detail/vec_distance_detail.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> struct L1Dist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum = __sad(val1, val2, mySum);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
template <> struct L1Dist<float>
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
mySum += ::fabs(val1 - val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct L2Dist
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef float result_type;
|
||||
|
||||
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
float reg = val1 - val2;
|
||||
mySum += reg * reg;
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return sqrtf(mySum);
|
||||
}
|
||||
|
||||
float mySum;
|
||||
};
|
||||
|
||||
struct HammingDist
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int result_type;
|
||||
|
||||
__device__ __forceinline__ HammingDist() : mySum(0) {}
|
||||
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum += __popc(val1 ^ val2);
|
||||
}
|
||||
|
||||
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||
{
|
||||
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
|
||||
}
|
||||
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
|
||||
int mySum;
|
||||
};
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
||||
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
for (int i = tid; i < len; i += THREAD_DIM)
|
||||
{
|
||||
T1 val1;
|
||||
ForceGlob<T1>::Load(vec1, i, val1);
|
||||
|
||||
T2 val2;
|
||||
ForceGlob<T2>::Load(vec2, i, val2);
|
||||
|
||||
dist.reduceIter(val1, val2);
|
||||
}
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
||||
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||
{
|
||||
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
||||
|
||||
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||
}
|
||||
|
||||
// calc distance between two vectors in global memory
|
||||
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
||||
{
|
||||
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
||||
{
|
||||
vec1 = vec1_;
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
const T1* vec1;
|
||||
};
|
||||
|
||||
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
||||
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
||||
{
|
||||
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
||||
{
|
||||
if (glob_tid < len)
|
||||
smem[glob_tid] = vec1[glob_tid];
|
||||
__syncthreads();
|
||||
|
||||
U* vec1ValsPtr = vec1Vals;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
|
||||
*vec1ValsPtr++ = smem[i];
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <typename T2, typename Dist>
|
||||
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||
{
|
||||
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
|
||||
}
|
||||
|
||||
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||
|
||||
@@ -1,330 +1,330 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VECMATH_HPP__
|
||||
#define __OPENCV_GPU_VECMATH_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace vec_math_detail
|
||||
{
|
||||
template <int cn, typename VecD> struct SatCastHelper;
|
||||
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<2, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<3, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<4, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
|
||||
{
|
||||
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
|
||||
}
|
||||
|
||||
namespace vec_math_detail
|
||||
{
|
||||
template <typename T1, typename T2> struct BinOpTraits
|
||||
{
|
||||
typedef int argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, T>
|
||||
{
|
||||
typedef T argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<double, T>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<double, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, float>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<float, T>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<float, float>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<double, float>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<float, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VECMATH_HPP__
|
||||
#define __OPENCV_GPU_VECMATH_HPP__
|
||||
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace vec_math_detail
|
||||
{
|
||||
template <int cn, typename VecD> struct SatCastHelper;
|
||||
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<2, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<3, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<4, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
|
||||
{
|
||||
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
|
||||
}
|
||||
|
||||
namespace vec_math_detail
|
||||
{
|
||||
template <typename T1, typename T2> struct BinOpTraits
|
||||
{
|
||||
typedef int argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, T>
|
||||
{
|
||||
typedef T argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<double, T>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<double, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, float>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<float, T>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<float, float>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<double, float>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<float, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
|
||||
} \
|
||||
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
|
||||
{ \
|
||||
func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VECMATH_HPP__
|
||||
@@ -1,280 +1,280 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
#define __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<typename T, int N> struct TypeVec;
|
||||
|
||||
struct __align__(8) uchar8
|
||||
{
|
||||
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
||||
{
|
||||
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(8) char8
|
||||
{
|
||||
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
||||
{
|
||||
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) ushort8
|
||||
{
|
||||
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
||||
{
|
||||
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) short8
|
||||
{
|
||||
short a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
||||
{
|
||||
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) uint8
|
||||
{
|
||||
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
||||
{
|
||||
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) int8
|
||||
{
|
||||
int a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
||||
{
|
||||
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) float8
|
||||
{
|
||||
float a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
||||
{
|
||||
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct double8
|
||||
{
|
||||
double a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
||||
{
|
||||
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
|
||||
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
|
||||
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
|
||||
|
||||
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
||||
|
||||
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
||||
|
||||
template<typename T> struct VecTraits;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
|
||||
template<> struct VecTraits<type> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
|
||||
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
|
||||
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 1> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 2> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=2}; \
|
||||
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 3> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=3}; \
|
||||
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 4> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=4}; \
|
||||
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 8> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=8}; \
|
||||
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
|
||||
|
||||
template<> struct VecTraits<char>
|
||||
{
|
||||
typedef char elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<schar>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<char1>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
||||
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
||||
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
||||
};
|
||||
template<> struct VecTraits<char2>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=2};
|
||||
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
||||
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
||||
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
||||
};
|
||||
template<> struct VecTraits<char3>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=3};
|
||||
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
||||
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
||||
};
|
||||
template<> struct VecTraits<char4>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=4};
|
||||
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
||||
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
||||
};
|
||||
template<> struct VecTraits<char8>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=8};
|
||||
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
#define __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<typename T, int N> struct TypeVec;
|
||||
|
||||
struct __align__(8) uchar8
|
||||
{
|
||||
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
||||
{
|
||||
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(8) char8
|
||||
{
|
||||
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
||||
{
|
||||
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) ushort8
|
||||
{
|
||||
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
||||
{
|
||||
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(16) short8
|
||||
{
|
||||
short a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
||||
{
|
||||
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) uint8
|
||||
{
|
||||
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
||||
{
|
||||
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) int8
|
||||
{
|
||||
int a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
||||
{
|
||||
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct __align__(32) float8
|
||||
{
|
||||
float a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
||||
{
|
||||
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
struct double8
|
||||
{
|
||||
double a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
};
|
||||
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
||||
{
|
||||
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||
return val;
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
|
||||
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
|
||||
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
|
||||
|
||||
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
||||
|
||||
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
||||
|
||||
template<typename T> struct VecTraits;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
|
||||
template<> struct VecTraits<type> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
|
||||
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
|
||||
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 1> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
|
||||
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 2> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=2}; \
|
||||
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
|
||||
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 3> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=3}; \
|
||||
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
|
||||
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 4> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=4}; \
|
||||
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
|
||||
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 8> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=8}; \
|
||||
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
|
||||
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
|
||||
|
||||
template<> struct VecTraits<char>
|
||||
{
|
||||
typedef char elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<schar>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
||||
};
|
||||
template<> struct VecTraits<char1>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
||||
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
||||
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
||||
};
|
||||
template<> struct VecTraits<char2>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=2};
|
||||
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
||||
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
||||
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
||||
};
|
||||
template<> struct VecTraits<char3>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=3};
|
||||
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
||||
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
||||
};
|
||||
template<> struct VecTraits<char4>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=4};
|
||||
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
||||
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
||||
};
|
||||
template<> struct VecTraits<char8>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=8};
|
||||
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
||||
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
|
||||
@@ -1,112 +1,112 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
|
||||
#define __OPENCV_GPU_DEVICE_WARP_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
struct Warp
|
||||
{
|
||||
enum
|
||||
{
|
||||
LOG_WARP_SIZE = 5,
|
||||
WARP_SIZE = 1 << LOG_WARP_SIZE,
|
||||
STRIDE = WARP_SIZE
|
||||
};
|
||||
|
||||
/** \brief Returns the warp lane ID of the calling thread. */
|
||||
static __device__ __forceinline__ unsigned int laneId()
|
||||
{
|
||||
unsigned int ret;
|
||||
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename It, typename T>
|
||||
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||
{
|
||||
for(It t = beg + laneId(); t < end; t += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt>
|
||||
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = *t;
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt, class UnOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = op(*t);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
|
||||
InIt1 t1 = beg1 + lane;
|
||||
InIt2 t2 = beg2 + lane;
|
||||
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
|
||||
*out = op(*t1, *t2);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename OutIt, typename T>
|
||||
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
value += lane;
|
||||
|
||||
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
|
||||
#define __OPENCV_GPU_DEVICE_WARP_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
struct Warp
|
||||
{
|
||||
enum
|
||||
{
|
||||
LOG_WARP_SIZE = 5,
|
||||
WARP_SIZE = 1 << LOG_WARP_SIZE,
|
||||
STRIDE = WARP_SIZE
|
||||
};
|
||||
|
||||
/** \brief Returns the warp lane ID of the calling thread. */
|
||||
static __device__ __forceinline__ unsigned int laneId()
|
||||
{
|
||||
unsigned int ret;
|
||||
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename It, typename T>
|
||||
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||
{
|
||||
for(It t = beg + laneId(); t < end; t += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt>
|
||||
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = *t;
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt, typename OutIt, class UnOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||
{
|
||||
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||
*out = op(*t);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
|
||||
InIt1 t1 = beg1 + lane;
|
||||
InIt2 t2 = beg2 + lane;
|
||||
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
|
||||
*out = op(*t1, *t2);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename OutIt, typename T>
|
||||
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||
{
|
||||
unsigned int lane = laneId();
|
||||
value += lane;
|
||||
|
||||
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
||||
*t = value;
|
||||
}
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
|
||||
@@ -1,69 +1,69 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
|
||||
#define OPENCV_GPU_WARP_REDUCE_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <class T>
|
||||
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||
{
|
||||
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[tid];
|
||||
|
||||
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||
}
|
||||
|
||||
return ptr[tid - lane];
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device {
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
|
||||
#define OPENCV_GPU_WARP_REDUCE_HPP__
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <class T>
|
||||
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||
{
|
||||
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||
|
||||
if (lane < 16)
|
||||
{
|
||||
T partial = ptr[tid];
|
||||
|
||||
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||
}
|
||||
|
||||
return ptr[tid - lane];
|
||||
}
|
||||
}}} // namespace cv { namespace gpu { namespace device {
|
||||
|
||||
#endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */
|
||||
@@ -1,43 +1,43 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,43 +1,43 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
|
||||
@@ -1,64 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
|
||||
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
|
||||
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
#include "cuda_runtime_api.h"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
|
||||
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
|
||||
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
#include "cuda_runtime_api.h"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
// This is only header file that depends on Cuda. All other headers are independent.
|
||||
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
|
||||
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
|
||||
// In this case you have to install Cuda Toolkit.
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */
|
||||
0
modules/gpu/misc/mark_nvidia.py
Normal file → Executable file
0
modules/gpu/misc/mark_nvidia.py
Normal file → Executable file
@@ -1,381 +1,381 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBM
|
||||
|
||||
typedef std::tr1::tuple<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImagePair, pair_string);
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
|
||||
{
|
||||
declare.time(5.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int preset = 0;
|
||||
const int ndisp = 256;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_bm(d_imgLeft, d_imgRight, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_bm(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::StereoBM bm(preset, ndisp);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
bm(imgLeft, imgRight, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
bm(imgLeft, imgRight, dst);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBeliefPropagation
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0));
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1));
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 64;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_bp(d_imgLeft, d_imgRight, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_bp(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoConstantSpaceBP
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_csbp(d_imgLeft, d_imgRight, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_csbp(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DisparityBilateralFilter
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
|
||||
{
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(disp.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_disp(disp);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_filter(d_disp, d_img, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_filter(d_disp, d_img, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// TransformPoints
|
||||
|
||||
DEF_PARAM_TEST_1(Count, int);
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
fillRandom(src, -100, 100);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ProjectPoints
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
fillRandom(src, -100, 100);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SolvePnPRansac
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat object(1, count, CV_32FC3);
|
||||
fillRandom(object, -100, 100);
|
||||
|
||||
cv::Mat camera_mat(3, 3, CV_32FC1);
|
||||
fillRandom(camera_mat, 0.5, 1);
|
||||
camera_mat.at<float>(0, 1) = 0.f;
|
||||
camera_mat.at<float>(1, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 1) = 0.f;
|
||||
|
||||
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
|
||||
|
||||
std::vector<cv::Point2f> image_vec;
|
||||
cv::Mat rvec_gold(1, 3, CV_32FC1);
|
||||
fillRandom(rvec_gold, 0, 1);
|
||||
cv::Mat tvec_gold(1, 3, CV_32FC1);
|
||||
fillRandom(tvec_gold, 0, 1);
|
||||
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
|
||||
|
||||
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
|
||||
|
||||
cv::Mat rvec;
|
||||
cv::Mat tvec;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
}
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rvec);
|
||||
CPU_SANITY_CHECK(tvec);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ReprojectImageTo3D
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, depth);
|
||||
fillRandom(src, 5.0, 30.0);
|
||||
|
||||
cv::Mat Q(4, 4, CV_32FC1);
|
||||
fillRandom(Q, 0.1, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::reprojectImageTo3D(src, dst, Q);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::reprojectImageTo3D(src, dst, Q);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DrawColorDisp
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src, 0, 255);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::drawColorDisp(d_src, d_dst, 255);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::drawColorDisp(d_src, d_dst, 255);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBM
|
||||
|
||||
typedef std::tr1::tuple<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImagePair, pair_string);
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
|
||||
{
|
||||
declare.time(5.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int preset = 0;
|
||||
const int ndisp = 256;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_bm(d_imgLeft, d_imgRight, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_bm(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::StereoBM bm(preset, ndisp);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
bm(imgLeft, imgRight, dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
bm(imgLeft, imgRight, dst);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBeliefPropagation
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0));
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1));
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 64;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_bp(d_imgLeft, d_imgRight, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_bp(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoConstantSpaceBP
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_csbp(d_imgLeft, d_imgRight, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_csbp(d_imgLeft, d_imgRight, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DisparityBilateralFilter
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
|
||||
{
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(disp.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_disp(disp);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_filter(d_disp, d_img, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_filter(d_disp, d_img, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// TransformPoints
|
||||
|
||||
DEF_PARAM_TEST_1(Count, int);
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
fillRandom(src, -100, 100);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ProjectPoints
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
fillRandom(src, -100, 100);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SolvePnPRansac
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat object(1, count, CV_32FC3);
|
||||
fillRandom(object, -100, 100);
|
||||
|
||||
cv::Mat camera_mat(3, 3, CV_32FC1);
|
||||
fillRandom(camera_mat, 0.5, 1);
|
||||
camera_mat.at<float>(0, 1) = 0.f;
|
||||
camera_mat.at<float>(1, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 1) = 0.f;
|
||||
|
||||
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
|
||||
|
||||
std::vector<cv::Point2f> image_vec;
|
||||
cv::Mat rvec_gold(1, 3, CV_32FC1);
|
||||
fillRandom(rvec_gold, 0, 1);
|
||||
cv::Mat tvec_gold(1, 3, CV_32FC1);
|
||||
fillRandom(tvec_gold, 0, 1);
|
||||
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
|
||||
|
||||
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
|
||||
|
||||
cv::Mat rvec;
|
||||
cv::Mat tvec;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
}
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rvec);
|
||||
CPU_SANITY_CHECK(tvec);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ReprojectImageTo3D
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, depth);
|
||||
fillRandom(src, 5.0, 30.0);
|
||||
|
||||
cv::Mat Q(4, 4, CV_32FC1);
|
||||
fillRandom(Q, 0.1, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::reprojectImageTo3D(src, dst, Q);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::reprojectImageTo3D(src, dst, Q);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DrawColorDisp
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src, 0, 255);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::drawColorDisp(d_src, d_dst, 255);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::drawColorDisp(d_src, d_dst, 255);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL() << "No such CPU implementation analogy.";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,309 +1,309 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SURF
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
declare.time(50.0);
|
||||
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::SURF_GPU d_surf;
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_descriptors, 1e-4);
|
||||
GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::SURF surf;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::Mat descriptors;
|
||||
|
||||
surf(img, cv::noArray(), keypoints, descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
surf(img, cv::noArray(), keypoints, descriptors);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK(descriptors, 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// FAST
|
||||
|
||||
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::FAST_GPU d_fast(20);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints;
|
||||
|
||||
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
|
||||
cv::FAST(img, keypoints, 20);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
cv::FAST(img, keypoints, 20);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ORB
|
||||
|
||||
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::ORB_GPU d_orb(4000);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
|
||||
GPU_SANITY_CHECK(d_descriptors);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::ORB orb(4000);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::Mat descriptors;
|
||||
|
||||
orb(img, cv::noArray(), keypoints, descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
orb(img, cv::noArray(), keypoints, descriptors);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK(descriptors);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFMatch
|
||||
|
||||
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int normType = GET_PARAM(1);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_distance;
|
||||
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
|
||||
matcher.match(query, train, matches);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.match(query, train, matches);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFKnnMatch
|
||||
|
||||
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
|
||||
Values(64, 128, 256),
|
||||
Values(2, 3),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int k = GET_PARAM(1);
|
||||
int normType = GET_PARAM(2);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
|
||||
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
matcher.knnMatch(query, train, matches, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.knnMatch(query, train, matches, k);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFRadiusMatch
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int normType = GET_PARAM(1);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query, 0.0, 1.0);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train, 0.0, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
|
||||
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
matcher.radiusMatch(query, train, matches, 2.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.radiusMatch(query, train, matches, 2.0);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SURF
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
declare.time(50.0);
|
||||
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::SURF_GPU d_surf;
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_descriptors, 1e-4);
|
||||
GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::SURF surf;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::Mat descriptors;
|
||||
|
||||
surf(img, cv::noArray(), keypoints, descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
surf(img, cv::noArray(), keypoints, descriptors);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK(descriptors, 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// FAST
|
||||
|
||||
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::FAST_GPU d_fast(20);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints;
|
||||
|
||||
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
|
||||
cv::FAST(img, keypoints, 20);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
cv::FAST(img, keypoints, 20);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ORB
|
||||
|
||||
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::ORB_GPU d_orb(4000);
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
|
||||
GPU_SANITY_CHECK(d_descriptors);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::ORB orb(4000);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::Mat descriptors;
|
||||
|
||||
orb(img, cv::noArray(), keypoints, descriptors);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
keypoints.clear();
|
||||
orb(img, cv::noArray(), keypoints, descriptors);
|
||||
}
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(keypoints);
|
||||
SANITY_CHECK(descriptors);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFMatch
|
||||
|
||||
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int normType = GET_PARAM(1);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_distance;
|
||||
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
|
||||
matcher.match(query, train, matches);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.match(query, train, matches);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFKnnMatch
|
||||
|
||||
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
|
||||
Values(64, 128, 256),
|
||||
Values(2, 3),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int k = GET_PARAM(1);
|
||||
int normType = GET_PARAM(2);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
|
||||
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
matcher.knnMatch(query, train, matches, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.knnMatch(query, train, matches, k);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFRadiusMatch
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
int desc_size = GET_PARAM(0);
|
||||
int normType = GET_PARAM(1);
|
||||
|
||||
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
|
||||
|
||||
cv::Mat query(3000, desc_size, type);
|
||||
fillRandom(query, 0.0, 1.0);
|
||||
|
||||
cv::Mat train(3000, desc_size, type);
|
||||
fillRandom(train, 0.0, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::BFMatcher_GPU d_matcher(normType);
|
||||
|
||||
cv::gpu::GpuMat d_query(query);
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
|
||||
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_trainIdx);
|
||||
GPU_SANITY_CHECK(d_distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::BFMatcher matcher(normType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
matcher.radiusMatch(query, train, matches, 2.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
matcher.radiusMatch(query, train, matches, 2.0);
|
||||
}
|
||||
|
||||
SANITY_CHECK(matches);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -1,415 +1,415 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Blur
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Sobel
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Scharr
|
||||
|
||||
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Scharr(src, dst, -1, 1, 0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Scharr(src, dst, -1, 1, 0);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GaussianBlur
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Laplacian
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Laplacian(src, dst, -1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Laplacian(src, dst, -1, ksize);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Erode
|
||||
|
||||
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::erode(d_src, d_dst, ker, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::erode(d_src, d_dst, ker, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::erode(src, dst, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::erode(src, dst, ker);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Dilate
|
||||
|
||||
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::dilate(src, dst, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::dilate(src, dst, ker);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// MorphologyEx
|
||||
|
||||
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
|
||||
#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
|
||||
|
||||
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int morphOp = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf1;
|
||||
cv::gpu::GpuMat d_buf2;
|
||||
|
||||
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::morphologyEx(src, dst, morphOp, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::morphologyEx(src, dst, morphOp, ker);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Filter2D
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat kernel(ksize, ksize, CV_32FC1);
|
||||
fillRandom(kernel, 0.0, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Blur
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::blur(src, dst, cv::Size(ksize, ksize));
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Sobel
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Sobel(src, dst, -1, 1, 1, ksize);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Scharr
|
||||
|
||||
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Scharr(src, dst, -1, 1, 0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Scharr(src, dst, -1, 1, 0);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GaussianBlur
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Laplacian
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::Laplacian(src, dst, -1, ksize);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::Laplacian(src, dst, -1, ksize);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Erode
|
||||
|
||||
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::erode(d_src, d_dst, ker, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::erode(d_src, d_dst, ker, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::erode(src, dst, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::erode(src, dst, ker);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Dilate
|
||||
|
||||
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf;
|
||||
|
||||
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::dilate(src, dst, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::dilate(src, dst, ker);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// MorphologyEx
|
||||
|
||||
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
|
||||
#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
|
||||
|
||||
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int morphOp = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
cv::gpu::GpuMat d_buf1;
|
||||
cv::gpu::GpuMat d_buf2;
|
||||
|
||||
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::morphologyEx(src, dst, morphOp, ker);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::morphologyEx(src, dst, morphOp, ker);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Filter2D
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int type = GET_PARAM(1);
|
||||
int ksize = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat kernel(ksize, ksize, CV_32FC1);
|
||||
fillRandom(kernel, 0.0, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -32,8 +32,8 @@ struct GreedyLabeling
|
||||
return lo <= d && d <= hi;
|
||||
}
|
||||
|
||||
private:
|
||||
InInterval& operator=(const InInterval&);
|
||||
private:
|
||||
InInterval& operator=(const InInterval&);
|
||||
|
||||
|
||||
};
|
||||
|
||||
@@ -1,185 +1,185 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SetTo
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Scalar val(1, 2, 3, 4);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(size, type);
|
||||
|
||||
d_src.setTo(val);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.setTo(val);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_src);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat src(size, type);
|
||||
|
||||
src.setTo(val);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.setTo(val);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(src);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SetToMasked
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat mask(size, CV_8UC1);
|
||||
fillRandom(mask, 0, 2);
|
||||
|
||||
cv::Scalar val(1, 2, 3, 4);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
|
||||
d_src.setTo(val, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.setTo(val, d_mask);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_src);
|
||||
}
|
||||
else
|
||||
{
|
||||
src.setTo(val, mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.setTo(val, mask);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(src);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CopyToMasked
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat mask(size, CV_8UC1);
|
||||
fillRandom(mask, 0, 2);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_src.copyTo(d_dst, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.copyTo(d_dst, d_mask);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
src.copyTo(dst, mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.copyTo(dst, mask);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ConvertTo
|
||||
|
||||
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
|
||||
|
||||
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth1 = GET_PARAM(1);
|
||||
int depth2 = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, depth1);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
src.convertTo(dst, depth2, 0.5, 1.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.convertTo(dst, depth2, 0.5, 1.0);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SetTo
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Scalar val(1, 2, 3, 4);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(size, type);
|
||||
|
||||
d_src.setTo(val);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.setTo(val);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_src);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat src(size, type);
|
||||
|
||||
src.setTo(val);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.setTo(val);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(src);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SetToMasked
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat mask(size, CV_8UC1);
|
||||
fillRandom(mask, 0, 2);
|
||||
|
||||
cv::Scalar val(1, 2, 3, 4);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
|
||||
d_src.setTo(val, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.setTo(val, d_mask);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_src);
|
||||
}
|
||||
else
|
||||
{
|
||||
src.setTo(val, mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.setTo(val, mask);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(src);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// CopyToMasked
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth = GET_PARAM(1);
|
||||
int channels = GET_PARAM(2);
|
||||
|
||||
int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
fillRandom(src);
|
||||
|
||||
cv::Mat mask(size, CV_8UC1);
|
||||
fillRandom(mask, 0, 2);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_src.copyTo(d_dst, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.copyTo(d_dst, d_mask);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
src.copyTo(dst, mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.copyTo(dst, mask);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ConvertTo
|
||||
|
||||
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
|
||||
|
||||
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
|
||||
{
|
||||
cv::Size size = GET_PARAM(0);
|
||||
int depth1 = GET_PARAM(1);
|
||||
int depth2 = GET_PARAM(2);
|
||||
|
||||
cv::Mat src(size, depth1);
|
||||
fillRandom(src);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_dst;
|
||||
|
||||
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
src.convertTo(dst, depth2, 0.5, 1.0);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
src.convertTo(dst, depth2, 0.5, 1.0);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -1,184 +1,184 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// HOG
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
|
||||
cv::gpu::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(found_locations);
|
||||
}
|
||||
|
||||
//===========test for CalTech data =============//
|
||||
DEF_PARAM_TEST_1(HOG, string);
|
||||
|
||||
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
|
||||
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
|
||||
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
|
||||
cv::gpu::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(found_locations);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// HaarClassifier
|
||||
|
||||
typedef pair<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
|
||||
|
||||
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::CascadeClassifier_GPU d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_objects_buffer;
|
||||
|
||||
d_cascade.detectMultiScale(d_img, d_objects_buffer);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_cascade.detectMultiScale(d_img, d_objects_buffer);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_objects_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::CascadeClassifier cascade;
|
||||
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
|
||||
|
||||
std::vector<cv::Rect> rects;
|
||||
|
||||
cascade.detectMultiScale(img, rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cascade.detectMultiScale(img, rects);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rects);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// LBP cascade
|
||||
|
||||
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::CascadeClassifier_GPU d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_gpu_rects;
|
||||
|
||||
d_cascade.detectMultiScale(d_img, d_gpu_rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_cascade.detectMultiScale(d_img, d_gpu_rects);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_gpu_rects);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::CascadeClassifier cascade;
|
||||
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
|
||||
|
||||
std::vector<cv::Rect> rects;
|
||||
|
||||
cascade.detectMultiScale(img, rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cascade.detectMultiScale(img, rects);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rects);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// HOG
|
||||
|
||||
DEF_PARAM_TEST_1(Image, string);
|
||||
|
||||
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
|
||||
cv::gpu::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(found_locations);
|
||||
}
|
||||
|
||||
//===========test for CalTech data =============//
|
||||
DEF_PARAM_TEST_1(HOG, string);
|
||||
|
||||
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
|
||||
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
|
||||
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
std::vector<cv::Rect> found_locations;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
|
||||
cv::gpu::HOGDescriptor d_hog;
|
||||
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_hog.detectMultiScale(d_img, found_locations);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::HOGDescriptor hog;
|
||||
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
|
||||
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
hog.detectMultiScale(img, found_locations);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(found_locations);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// HaarClassifier
|
||||
|
||||
typedef pair<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
|
||||
|
||||
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::CascadeClassifier_GPU d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_objects_buffer;
|
||||
|
||||
d_cascade.detectMultiScale(d_img, d_objects_buffer);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_cascade.detectMultiScale(d_img, d_objects_buffer);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_objects_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::CascadeClassifier cascade;
|
||||
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
|
||||
|
||||
std::vector<cv::Rect> rects;
|
||||
|
||||
cascade.detectMultiScale(img, rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cascade.detectMultiScale(img, rects);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rects);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// LBP cascade
|
||||
|
||||
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
|
||||
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
|
||||
{
|
||||
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::CascadeClassifier_GPU d_cascade;
|
||||
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
|
||||
|
||||
cv::gpu::GpuMat d_img(img);
|
||||
cv::gpu::GpuMat d_gpu_rects;
|
||||
|
||||
d_cascade.detectMultiScale(d_img, d_gpu_rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_cascade.detectMultiScale(d_img, d_gpu_rects);
|
||||
}
|
||||
|
||||
GPU_SANITY_CHECK(d_gpu_rects);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::CascadeClassifier cascade;
|
||||
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
|
||||
|
||||
std::vector<cv::Rect> rects;
|
||||
|
||||
cascade.detectMultiScale(img, rects);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cascade.detectMultiScale(img, rects);
|
||||
}
|
||||
|
||||
CPU_SANITY_CHECK(rects);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -1,37 +1,37 @@
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes" //OSX
|
||||
#endif
|
||||
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__
|
||||
#define __OPENCV_PERF_PRECOMP_HPP__
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#include "opencv2/ts/ts.hpp"
|
||||
#include "opencv2/ts/ts_perf.hpp"
|
||||
|
||||
#include "opencv2/core/core.hpp"
|
||||
#include "opencv2/highgui/highgui.hpp"
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
#include "opencv2/calib3d/calib3d.hpp"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
#include "opencv2/video/video.hpp"
|
||||
#include "opencv2/nonfree/nonfree.hpp"
|
||||
#include "opencv2/legacy/legacy.hpp"
|
||||
#include "opencv2/photo/photo.hpp"
|
||||
|
||||
#include "utility.hpp"
|
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY
|
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes" //OSX
|
||||
#endif
|
||||
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__
|
||||
#define __OPENCV_PERF_PRECOMP_HPP__
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#include "opencv2/ts/ts.hpp"
|
||||
#include "opencv2/ts/ts_perf.hpp"
|
||||
|
||||
#include "opencv2/core/core.hpp"
|
||||
#include "opencv2/highgui/highgui.hpp"
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
#include "opencv2/calib3d/calib3d.hpp"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
#include "opencv2/video/video.hpp"
|
||||
#include "opencv2/nonfree/nonfree.hpp"
|
||||
#include "opencv2/legacy/legacy.hpp"
|
||||
#include "opencv2/photo/photo.hpp"
|
||||
|
||||
#include "utility.hpp"
|
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY
|
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,191 +1,191 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
void fillRandom(Mat& m, double a, double b)
|
||||
{
|
||||
RNG rng(123456789);
|
||||
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
|
||||
}
|
||||
|
||||
Mat readImage(const string& fileName, int flags)
|
||||
{
|
||||
return imread(perf::TestBase::getDataPath(fileName), flags);
|
||||
}
|
||||
|
||||
void PrintTo(const CvtColorInfo& info, ostream* os)
|
||||
{
|
||||
static const char* str[] =
|
||||
{
|
||||
"BGR2BGRA",
|
||||
"BGRA2BGR",
|
||||
"BGR2RGBA",
|
||||
"RGBA2BGR",
|
||||
"BGR2RGB",
|
||||
"BGRA2RGBA",
|
||||
|
||||
"BGR2GRAY",
|
||||
"RGB2GRAY",
|
||||
"GRAY2BGR",
|
||||
"GRAY2BGRA",
|
||||
"BGRA2GRAY",
|
||||
"RGBA2GRAY",
|
||||
|
||||
"BGR2BGR565",
|
||||
"RGB2BGR565",
|
||||
"BGR5652BGR",
|
||||
"BGR5652RGB",
|
||||
"BGRA2BGR565",
|
||||
"RGBA2BGR565",
|
||||
"BGR5652BGRA",
|
||||
"BGR5652RGBA",
|
||||
|
||||
"GRAY2BGR565",
|
||||
"BGR5652GRAY",
|
||||
|
||||
"BGR2BGR555",
|
||||
"RGB2BGR555",
|
||||
"BGR5552BGR",
|
||||
"BGR5552RGB",
|
||||
"BGRA2BGR555",
|
||||
"RGBA2BGR555",
|
||||
"BGR5552BGRA",
|
||||
"BGR5552RGBA",
|
||||
|
||||
"GRAY2BGR555",
|
||||
"BGR5552GRAY",
|
||||
|
||||
"BGR2XYZ",
|
||||
"RGB2XYZ",
|
||||
"XYZ2BGR",
|
||||
"XYZ2RGB",
|
||||
|
||||
"BGR2YCrCb",
|
||||
"RGB2YCrCb",
|
||||
"YCrCb2BGR",
|
||||
"YCrCb2RGB",
|
||||
|
||||
"BGR2HSV",
|
||||
"RGB2HSV",
|
||||
|
||||
"",
|
||||
"",
|
||||
|
||||
"BGR2Lab",
|
||||
"RGB2Lab",
|
||||
|
||||
"BayerBG2BGR",
|
||||
"BayerGB2BGR",
|
||||
"BayerRG2BGR",
|
||||
"BayerGR2BGR",
|
||||
|
||||
"BGR2Luv",
|
||||
"RGB2Luv",
|
||||
|
||||
"BGR2HLS",
|
||||
"RGB2HLS",
|
||||
|
||||
"HSV2BGR",
|
||||
"HSV2RGB",
|
||||
|
||||
"Lab2BGR",
|
||||
"Lab2RGB",
|
||||
"Luv2BGR",
|
||||
"Luv2RGB",
|
||||
|
||||
"HLS2BGR",
|
||||
"HLS2RGB",
|
||||
|
||||
"BayerBG2BGR_VNG",
|
||||
"BayerGB2BGR_VNG",
|
||||
"BayerRG2BGR_VNG",
|
||||
"BayerGR2BGR_VNG",
|
||||
|
||||
"BGR2HSV_FULL",
|
||||
"RGB2HSV_FULL",
|
||||
"BGR2HLS_FULL",
|
||||
"RGB2HLS_FULL",
|
||||
|
||||
"HSV2BGR_FULL",
|
||||
"HSV2RGB_FULL",
|
||||
"HLS2BGR_FULL",
|
||||
"HLS2RGB_FULL",
|
||||
|
||||
"LBGR2Lab",
|
||||
"LRGB2Lab",
|
||||
"LBGR2Luv",
|
||||
"LRGB2Luv",
|
||||
|
||||
"Lab2LBGR",
|
||||
"Lab2LRGB",
|
||||
"Luv2LBGR",
|
||||
"Luv2LRGB",
|
||||
|
||||
"BGR2YUV",
|
||||
"RGB2YUV",
|
||||
"YUV2BGR",
|
||||
"YUV2RGB",
|
||||
|
||||
"BayerBG2GRAY",
|
||||
"BayerGB2GRAY",
|
||||
"BayerRG2GRAY",
|
||||
"BayerGR2GRAY",
|
||||
|
||||
//YUV 4:2:0 formats family
|
||||
"YUV2RGB_NV12",
|
||||
"YUV2BGR_NV12",
|
||||
"YUV2RGB_NV21",
|
||||
"YUV2BGR_NV21",
|
||||
|
||||
"YUV2RGBA_NV12",
|
||||
"YUV2BGRA_NV12",
|
||||
"YUV2RGBA_NV21",
|
||||
"YUV2BGRA_NV21",
|
||||
|
||||
"YUV2RGB_YV12",
|
||||
"YUV2BGR_YV12",
|
||||
"YUV2RGB_IYUV",
|
||||
"YUV2BGR_IYUV",
|
||||
|
||||
"YUV2RGBA_YV12",
|
||||
"YUV2BGRA_YV12",
|
||||
"YUV2RGBA_IYUV",
|
||||
"YUV2BGRA_IYUV",
|
||||
|
||||
"YUV2GRAY_420",
|
||||
|
||||
//YUV 4:2:2 formats family
|
||||
"YUV2RGB_UYVY",
|
||||
"YUV2BGR_UYVY",
|
||||
"YUV2RGB_VYUY",
|
||||
"YUV2BGR_VYUY",
|
||||
|
||||
"YUV2RGBA_UYVY",
|
||||
"YUV2BGRA_UYVY",
|
||||
"YUV2RGBA_VYUY",
|
||||
"YUV2BGRA_VYUY",
|
||||
|
||||
"YUV2RGB_YUY2",
|
||||
"YUV2BGR_YUY2",
|
||||
"YUV2RGB_YVYU",
|
||||
"YUV2BGR_YVYU",
|
||||
|
||||
"YUV2RGBA_YUY2",
|
||||
"YUV2BGRA_YUY2",
|
||||
"YUV2RGBA_YVYU",
|
||||
"YUV2BGRA_YVYU",
|
||||
|
||||
"YUV2GRAY_UYVY",
|
||||
"YUV2GRAY_YUY2",
|
||||
|
||||
// alpha premultiplication
|
||||
"RGBA2mRGBA",
|
||||
"mRGBA2RGBA",
|
||||
|
||||
"COLORCVT_MAX"
|
||||
};
|
||||
|
||||
*os << str[info.code];
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
void fillRandom(Mat& m, double a, double b)
|
||||
{
|
||||
RNG rng(123456789);
|
||||
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
|
||||
}
|
||||
|
||||
Mat readImage(const string& fileName, int flags)
|
||||
{
|
||||
return imread(perf::TestBase::getDataPath(fileName), flags);
|
||||
}
|
||||
|
||||
void PrintTo(const CvtColorInfo& info, ostream* os)
|
||||
{
|
||||
static const char* str[] =
|
||||
{
|
||||
"BGR2BGRA",
|
||||
"BGRA2BGR",
|
||||
"BGR2RGBA",
|
||||
"RGBA2BGR",
|
||||
"BGR2RGB",
|
||||
"BGRA2RGBA",
|
||||
|
||||
"BGR2GRAY",
|
||||
"RGB2GRAY",
|
||||
"GRAY2BGR",
|
||||
"GRAY2BGRA",
|
||||
"BGRA2GRAY",
|
||||
"RGBA2GRAY",
|
||||
|
||||
"BGR2BGR565",
|
||||
"RGB2BGR565",
|
||||
"BGR5652BGR",
|
||||
"BGR5652RGB",
|
||||
"BGRA2BGR565",
|
||||
"RGBA2BGR565",
|
||||
"BGR5652BGRA",
|
||||
"BGR5652RGBA",
|
||||
|
||||
"GRAY2BGR565",
|
||||
"BGR5652GRAY",
|
||||
|
||||
"BGR2BGR555",
|
||||
"RGB2BGR555",
|
||||
"BGR5552BGR",
|
||||
"BGR5552RGB",
|
||||
"BGRA2BGR555",
|
||||
"RGBA2BGR555",
|
||||
"BGR5552BGRA",
|
||||
"BGR5552RGBA",
|
||||
|
||||
"GRAY2BGR555",
|
||||
"BGR5552GRAY",
|
||||
|
||||
"BGR2XYZ",
|
||||
"RGB2XYZ",
|
||||
"XYZ2BGR",
|
||||
"XYZ2RGB",
|
||||
|
||||
"BGR2YCrCb",
|
||||
"RGB2YCrCb",
|
||||
"YCrCb2BGR",
|
||||
"YCrCb2RGB",
|
||||
|
||||
"BGR2HSV",
|
||||
"RGB2HSV",
|
||||
|
||||
"",
|
||||
"",
|
||||
|
||||
"BGR2Lab",
|
||||
"RGB2Lab",
|
||||
|
||||
"BayerBG2BGR",
|
||||
"BayerGB2BGR",
|
||||
"BayerRG2BGR",
|
||||
"BayerGR2BGR",
|
||||
|
||||
"BGR2Luv",
|
||||
"RGB2Luv",
|
||||
|
||||
"BGR2HLS",
|
||||
"RGB2HLS",
|
||||
|
||||
"HSV2BGR",
|
||||
"HSV2RGB",
|
||||
|
||||
"Lab2BGR",
|
||||
"Lab2RGB",
|
||||
"Luv2BGR",
|
||||
"Luv2RGB",
|
||||
|
||||
"HLS2BGR",
|
||||
"HLS2RGB",
|
||||
|
||||
"BayerBG2BGR_VNG",
|
||||
"BayerGB2BGR_VNG",
|
||||
"BayerRG2BGR_VNG",
|
||||
"BayerGR2BGR_VNG",
|
||||
|
||||
"BGR2HSV_FULL",
|
||||
"RGB2HSV_FULL",
|
||||
"BGR2HLS_FULL",
|
||||
"RGB2HLS_FULL",
|
||||
|
||||
"HSV2BGR_FULL",
|
||||
"HSV2RGB_FULL",
|
||||
"HLS2BGR_FULL",
|
||||
"HLS2RGB_FULL",
|
||||
|
||||
"LBGR2Lab",
|
||||
"LRGB2Lab",
|
||||
"LBGR2Luv",
|
||||
"LRGB2Luv",
|
||||
|
||||
"Lab2LBGR",
|
||||
"Lab2LRGB",
|
||||
"Luv2LBGR",
|
||||
"Luv2LRGB",
|
||||
|
||||
"BGR2YUV",
|
||||
"RGB2YUV",
|
||||
"YUV2BGR",
|
||||
"YUV2RGB",
|
||||
|
||||
"BayerBG2GRAY",
|
||||
"BayerGB2GRAY",
|
||||
"BayerRG2GRAY",
|
||||
"BayerGR2GRAY",
|
||||
|
||||
//YUV 4:2:0 formats family
|
||||
"YUV2RGB_NV12",
|
||||
"YUV2BGR_NV12",
|
||||
"YUV2RGB_NV21",
|
||||
"YUV2BGR_NV21",
|
||||
|
||||
"YUV2RGBA_NV12",
|
||||
"YUV2BGRA_NV12",
|
||||
"YUV2RGBA_NV21",
|
||||
"YUV2BGRA_NV21",
|
||||
|
||||
"YUV2RGB_YV12",
|
||||
"YUV2BGR_YV12",
|
||||
"YUV2RGB_IYUV",
|
||||
"YUV2BGR_IYUV",
|
||||
|
||||
"YUV2RGBA_YV12",
|
||||
"YUV2BGRA_YV12",
|
||||
"YUV2RGBA_IYUV",
|
||||
"YUV2BGRA_IYUV",
|
||||
|
||||
"YUV2GRAY_420",
|
||||
|
||||
//YUV 4:2:2 formats family
|
||||
"YUV2RGB_UYVY",
|
||||
"YUV2BGR_UYVY",
|
||||
"YUV2RGB_VYUY",
|
||||
"YUV2BGR_VYUY",
|
||||
|
||||
"YUV2RGBA_UYVY",
|
||||
"YUV2BGRA_UYVY",
|
||||
"YUV2RGBA_VYUY",
|
||||
"YUV2BGRA_VYUY",
|
||||
|
||||
"YUV2RGB_YUY2",
|
||||
"YUV2BGR_YUY2",
|
||||
"YUV2RGB_YVYU",
|
||||
"YUV2BGR_YVYU",
|
||||
|
||||
"YUV2RGBA_YUY2",
|
||||
"YUV2BGRA_YUY2",
|
||||
"YUV2RGBA_YVYU",
|
||||
"YUV2BGRA_YVYU",
|
||||
|
||||
"YUV2GRAY_UYVY",
|
||||
"YUV2GRAY_YUY2",
|
||||
|
||||
// alpha premultiplication
|
||||
"RGBA2mRGBA",
|
||||
"mRGBA2RGBA",
|
||||
|
||||
"COLORCVT_MAX"
|
||||
};
|
||||
|
||||
*os << str[info.code];
|
||||
}
|
||||
@@ -1,84 +1,84 @@
|
||||
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
#define __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
|
||||
#include "opencv2/core/core.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
#include "opencv2/ts/ts_perf.hpp"
|
||||
|
||||
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
|
||||
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
|
||||
|
||||
using perf::MatType;
|
||||
using perf::MatDepth;
|
||||
|
||||
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
|
||||
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
|
||||
|
||||
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
|
||||
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
|
||||
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
|
||||
|
||||
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
|
||||
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
|
||||
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
|
||||
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
|
||||
|
||||
struct CvtColorInfo
|
||||
{
|
||||
int scn;
|
||||
int dcn;
|
||||
int code;
|
||||
|
||||
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
|
||||
};
|
||||
void PrintTo(const CvtColorInfo& info, std::ostream* os);
|
||||
|
||||
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
|
||||
|
||||
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
|
||||
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
|
||||
|
||||
DEF_PARAM_TEST_1(Sz, cv::Size);
|
||||
typedef perf::Size_MatType Sz_Type;
|
||||
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
|
||||
|
||||
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
|
||||
|
||||
#define GPU_SANITY_CHECK(dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define CPU_SANITY_CHECK(cmat, ...) \
|
||||
do{ \
|
||||
SANITY_CHECK(cmat, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \
|
||||
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
|
||||
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
|
||||
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
|
||||
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
|
||||
|
||||
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
#define __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
|
||||
#include "opencv2/core/core.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
#include "opencv2/ts/ts_perf.hpp"
|
||||
|
||||
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
|
||||
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
|
||||
|
||||
using perf::MatType;
|
||||
using perf::MatDepth;
|
||||
|
||||
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
|
||||
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
|
||||
|
||||
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
|
||||
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
|
||||
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
|
||||
|
||||
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
|
||||
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
|
||||
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
|
||||
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
|
||||
|
||||
struct CvtColorInfo
|
||||
{
|
||||
int scn;
|
||||
int dcn;
|
||||
int code;
|
||||
|
||||
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
|
||||
};
|
||||
void PrintTo(const CvtColorInfo& info, std::ostream* os);
|
||||
|
||||
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
|
||||
|
||||
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
|
||||
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
|
||||
|
||||
DEF_PARAM_TEST_1(Sz, cv::Size);
|
||||
typedef perf::Size_MatType Sz_Type;
|
||||
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
|
||||
|
||||
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
|
||||
|
||||
#define GPU_SANITY_CHECK(dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define CPU_SANITY_CHECK(cmat, ...) \
|
||||
do{ \
|
||||
SANITY_CHECK(cmat, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \
|
||||
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
|
||||
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
|
||||
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
|
||||
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
|
||||
do{ \
|
||||
cv::Mat d##dmat(dmat); \
|
||||
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
|
||||
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
|
||||
} while(0)
|
||||
|
||||
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
|
||||
|
||||
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,158 +1,158 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_nogpu(); }
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace disp_bilateral_filter
|
||||
{
|
||||
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
||||
|
||||
template<typename T>
|
||||
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::device::disp_bilateral_filter;
|
||||
|
||||
namespace
|
||||
{
|
||||
const float DEFAULT_EDGE_THRESHOLD = 0.1f;
|
||||
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
|
||||
const float DEFAULT_SIGMA_RANGE = 10.0f;
|
||||
|
||||
inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
|
||||
{
|
||||
Mat cpu_table_color(1, len, CV_32F);
|
||||
|
||||
float* line = cpu_table_color.ptr<float>();
|
||||
|
||||
for(int i = 0; i < len; i++)
|
||||
line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
|
||||
|
||||
table_color.upload(cpu_table_color);
|
||||
}
|
||||
|
||||
inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
|
||||
{
|
||||
int half = (win_size >> 1);
|
||||
|
||||
Mat cpu_table_space(half + 1, half + 1, CV_32F);
|
||||
|
||||
for (int y = 0; y <= half; ++y)
|
||||
{
|
||||
float* row = cpu_table_space.ptr<float>(y);
|
||||
for (int x = 0; x <= half; ++x)
|
||||
row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
|
||||
}
|
||||
|
||||
table_space.upload(cpu_table_space);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
|
||||
GpuMat& table_color, GpuMat& table_space,
|
||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
||||
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
||||
|
||||
disp_load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
||||
|
||||
if (&dst != &disp)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueCopy(disp, dst);
|
||||
else
|
||||
disp.copyTo(dst);
|
||||
}
|
||||
|
||||
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
||||
GpuMat& table_color, GpuMat& table_space,
|
||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
|
||||
|
||||
const bilateral_filter_operator_t operators[] =
|
||||
{disp_bilateral_filter_operator<unsigned char>, 0, 0, disp_bilateral_filter_operator<short>, 0, 0, 0, 0};
|
||||
}
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_)
|
||||
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
|
||||
sigma_range(DEFAULT_SIGMA_RANGE)
|
||||
{
|
||||
calc_color_weighted_table(table_color, sigma_range, 255);
|
||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||
}
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_,
|
||||
float max_disc_threshold_, float sigma_range_)
|
||||
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_),
|
||||
sigma_range(sigma_range_)
|
||||
{
|
||||
calc_color_weighted_table(table_color, sigma_range, 255);
|
||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||
}
|
||||
|
||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
|
||||
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
|
||||
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_nogpu(); }
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace disp_bilateral_filter
|
||||
{
|
||||
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
||||
|
||||
template<typename T>
|
||||
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::device::disp_bilateral_filter;
|
||||
|
||||
namespace
|
||||
{
|
||||
const float DEFAULT_EDGE_THRESHOLD = 0.1f;
|
||||
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
|
||||
const float DEFAULT_SIGMA_RANGE = 10.0f;
|
||||
|
||||
inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
|
||||
{
|
||||
Mat cpu_table_color(1, len, CV_32F);
|
||||
|
||||
float* line = cpu_table_color.ptr<float>();
|
||||
|
||||
for(int i = 0; i < len; i++)
|
||||
line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
|
||||
|
||||
table_color.upload(cpu_table_color);
|
||||
}
|
||||
|
||||
inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
|
||||
{
|
||||
int half = (win_size >> 1);
|
||||
|
||||
Mat cpu_table_space(half + 1, half + 1, CV_32F);
|
||||
|
||||
for (int y = 0; y <= half; ++y)
|
||||
{
|
||||
float* row = cpu_table_space.ptr<float>(y);
|
||||
for (int x = 0; x <= half; ++x)
|
||||
row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
|
||||
}
|
||||
|
||||
table_space.upload(cpu_table_space);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
|
||||
GpuMat& table_color, GpuMat& table_space,
|
||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
||||
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
||||
|
||||
disp_load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
||||
|
||||
if (&dst != &disp)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueCopy(disp, dst);
|
||||
else
|
||||
disp.copyTo(dst);
|
||||
}
|
||||
|
||||
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
||||
GpuMat& table_color, GpuMat& table_space,
|
||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
|
||||
|
||||
const bilateral_filter_operator_t operators[] =
|
||||
{disp_bilateral_filter_operator<unsigned char>, 0, 0, disp_bilateral_filter_operator<short>, 0, 0, 0, 0};
|
||||
}
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_)
|
||||
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
|
||||
sigma_range(DEFAULT_SIGMA_RANGE)
|
||||
{
|
||||
calc_color_weighted_table(table_color, sigma_range, 255);
|
||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||
}
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_,
|
||||
float max_disc_threshold_, float sigma_range_)
|
||||
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_),
|
||||
sigma_range(sigma_range_)
|
||||
{
|
||||
calc_color_weighted_table(table_color, sigma_range, 255);
|
||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||
}
|
||||
|
||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
|
||||
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
|
||||
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
@@ -1,100 +1,100 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
|
||||
|
||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::device::blend;
|
||||
|
||||
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||
GpuMat& result, Stream& stream)
|
||||
{
|
||||
CV_Assert(img1.size() == img2.size());
|
||||
CV_Assert(img1.type() == img2.type());
|
||||
CV_Assert(weights1.size() == img1.size());
|
||||
CV_Assert(weights2.size() == img2.size());
|
||||
CV_Assert(weights1.type() == CV_32F);
|
||||
CV_Assert(weights2.type() == CV_32F);
|
||||
|
||||
const Size size = img1.size();
|
||||
const int depth = img1.depth();
|
||||
const int cn = img1.channels();
|
||||
|
||||
result.create(size, CV_MAKE_TYPE(depth, cn));
|
||||
|
||||
switch (depth)
|
||||
{
|
||||
case CV_8U:
|
||||
if (cn != 4)
|
||||
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
else
|
||||
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case CV_32F:
|
||||
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
|
||||
|
||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::device::blend;
|
||||
|
||||
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||
GpuMat& result, Stream& stream)
|
||||
{
|
||||
CV_Assert(img1.size() == img2.size());
|
||||
CV_Assert(img1.type() == img2.type());
|
||||
CV_Assert(weights1.size() == img1.size());
|
||||
CV_Assert(weights2.size() == img2.size());
|
||||
CV_Assert(weights1.type() == CV_32F);
|
||||
CV_Assert(weights2.type() == CV_32F);
|
||||
|
||||
const Size size = img1.size();
|
||||
const int depth = img1.depth();
|
||||
const int cn = img1.channels();
|
||||
|
||||
result.create(size, CV_MAKE_TYPE(depth, cn));
|
||||
|
||||
switch (depth)
|
||||
{
|
||||
case CV_8U:
|
||||
if (cn != 4)
|
||||
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
else
|
||||
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case CV_32F:
|
||||
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,295 +1,295 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace transform_points
|
||||
{
|
||||
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace project_points
|
||||
{
|
||||
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace solve_pnp_ransac
|
||||
{
|
||||
int maxNumIters();
|
||||
|
||||
void computeHypothesisScores(
|
||||
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
||||
const float3* transl_vectors, const float3* object, const float2* image,
|
||||
const float dist_threshold, int* hypothesis_scores);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::device;
|
||||
|
||||
namespace
|
||||
{
|
||||
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
|
||||
|
||||
// Convert rotation vector into matrix
|
||||
Mat rot;
|
||||
Rodrigues(rvec, rot);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
|
||||
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
|
||||
CV_Assert(dist_coef.empty()); // Undistortion isn't supported
|
||||
|
||||
// Convert rotation vector into matrix
|
||||
Mat rot;
|
||||
Rodrigues(rvec, rot);
|
||||
|
||||
dst.create(src.size(), CV_32FC2);
|
||||
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
// Selects subset_size random different points from [0, num_points - 1] range
|
||||
void selectRandom(int subset_size, int num_points, vector<int>& subset)
|
||||
{
|
||||
subset.resize(subset_size);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
{
|
||||
bool was;
|
||||
do
|
||||
{
|
||||
subset[i] = rand() % num_points;
|
||||
was = false;
|
||||
for (int j = 0; j < i; ++j)
|
||||
if (subset[j] == subset[i])
|
||||
{
|
||||
was = true;
|
||||
break;
|
||||
}
|
||||
} while (was);
|
||||
}
|
||||
}
|
||||
|
||||
// Computes rotation, translation pair for small subsets if the input data
|
||||
class TransformHypothesesGenerator
|
||||
{
|
||||
public:
|
||||
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
|
||||
const Mat& camera_mat_, int num_points_, int subset_size_,
|
||||
Mat rot_matrices_, Mat transl_vectors_)
|
||||
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
|
||||
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
|
||||
transl_vectors(transl_vectors_) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
{
|
||||
// Input data for generation of the current hypothesis
|
||||
vector<int> subset_indices(subset_size);
|
||||
Mat_<Point3f> object_subset(1, subset_size);
|
||||
Mat_<Point2f> image_subset(1, subset_size);
|
||||
|
||||
// Current hypothesis data
|
||||
Mat rot_vec(1, 3, CV_64F);
|
||||
Mat rot_mat(3, 3, CV_64F);
|
||||
Mat transl_vec(1, 3, CV_64F);
|
||||
|
||||
for (int iter = range.begin(); iter < range.end(); ++iter)
|
||||
{
|
||||
selectRandom(subset_size, num_points, subset_indices);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
{
|
||||
object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
|
||||
image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
|
||||
}
|
||||
|
||||
solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
|
||||
|
||||
// Remember translation vector
|
||||
Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
|
||||
transl_vec = transl_vec.reshape(0, 1);
|
||||
transl_vec.convertTo(transl_vec_, CV_32F);
|
||||
|
||||
// Remember rotation matrix
|
||||
Rodrigues(rot_vec, rot_mat);
|
||||
Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
|
||||
rot_mat.convertTo(rot_mat_, CV_32F);
|
||||
}
|
||||
}
|
||||
|
||||
const Mat* object;
|
||||
const Mat* image;
|
||||
const Mat* dist_coef;
|
||||
const Mat* camera_mat;
|
||||
int num_points;
|
||||
int subset_size;
|
||||
|
||||
// Hypotheses storage (global)
|
||||
Mat rot_matrices;
|
||||
Mat transl_vectors;
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
|
||||
int num_iters, float max_dist, int min_inlier_count,
|
||||
vector<int>* inliers)
|
||||
{
|
||||
(void)min_inlier_count;
|
||||
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
|
||||
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
|
||||
CV_Assert(object.cols == image.cols);
|
||||
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
|
||||
CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
|
||||
CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
|
||||
|
||||
const int subset_size = 4;
|
||||
const int num_points = object.cols;
|
||||
CV_Assert(num_points >= subset_size);
|
||||
|
||||
// Unapply distortion and intrinsic camera transformations
|
||||
Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
|
||||
Mat empty_dist_coef;
|
||||
Mat image_normalized;
|
||||
undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
|
||||
|
||||
// Hypotheses storage (global)
|
||||
Mat rot_matrices(1, num_iters * 9, CV_32F);
|
||||
Mat transl_vectors(1, num_iters * 3, CV_32F);
|
||||
|
||||
// Generate set of hypotheses using small subsets of the input data
|
||||
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
|
||||
num_points, subset_size, rot_matrices, transl_vectors);
|
||||
parallel_for(BlockedRange(0, num_iters), body);
|
||||
|
||||
// Compute scores (i.e. number of inliers) for each hypothesis
|
||||
GpuMat d_object(object);
|
||||
GpuMat d_image_normalized(image_normalized);
|
||||
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
|
||||
solve_pnp_ransac::computeHypothesisScores(
|
||||
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
|
||||
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
|
||||
d_hypothesis_scores.ptr<int>());
|
||||
|
||||
// Find the best hypothesis index
|
||||
Point best_idx;
|
||||
double best_score;
|
||||
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
|
||||
int num_inliers = static_cast<int>(best_score);
|
||||
|
||||
// Extract the best hypothesis data
|
||||
|
||||
Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
|
||||
Rodrigues(rot_mat, rvec);
|
||||
rvec = rvec.reshape(0, 1);
|
||||
|
||||
tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
|
||||
tvec = tvec.reshape(0, 1);
|
||||
|
||||
// Build vector of inlier indices
|
||||
if (inliers != NULL)
|
||||
{
|
||||
inliers->clear();
|
||||
inliers->reserve(num_inliers);
|
||||
|
||||
Point3f p, p_transf;
|
||||
Point2f p_proj;
|
||||
const float* rot = rot_mat.ptr<float>();
|
||||
const float* transl = tvec.ptr<float>();
|
||||
|
||||
for (int i = 0; i < num_points; ++i)
|
||||
{
|
||||
p = object.at<Point3f>(0, i);
|
||||
p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
|
||||
p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
|
||||
p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
|
||||
p_proj.x = p_transf.x / p_transf.z;
|
||||
p_proj.y = p_transf.y / p_transf.z;
|
||||
if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
|
||||
inliers->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace transform_points
|
||||
{
|
||||
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace project_points
|
||||
{
|
||||
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace solve_pnp_ransac
|
||||
{
|
||||
int maxNumIters();
|
||||
|
||||
void computeHypothesisScores(
|
||||
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
||||
const float3* transl_vectors, const float3* object, const float2* image,
|
||||
const float dist_threshold, int* hypothesis_scores);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::device;
|
||||
|
||||
namespace
|
||||
{
|
||||
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
|
||||
|
||||
// Convert rotation vector into matrix
|
||||
Mat rot;
|
||||
Rodrigues(rvec, rot);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
|
||||
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
|
||||
CV_Assert(dist_coef.empty()); // Undistortion isn't supported
|
||||
|
||||
// Convert rotation vector into matrix
|
||||
Mat rot;
|
||||
Rodrigues(rvec, rot);
|
||||
|
||||
dst.create(src.size(), CV_32FC2);
|
||||
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
// Selects subset_size random different points from [0, num_points - 1] range
|
||||
void selectRandom(int subset_size, int num_points, vector<int>& subset)
|
||||
{
|
||||
subset.resize(subset_size);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
{
|
||||
bool was;
|
||||
do
|
||||
{
|
||||
subset[i] = rand() % num_points;
|
||||
was = false;
|
||||
for (int j = 0; j < i; ++j)
|
||||
if (subset[j] == subset[i])
|
||||
{
|
||||
was = true;
|
||||
break;
|
||||
}
|
||||
} while (was);
|
||||
}
|
||||
}
|
||||
|
||||
// Computes rotation, translation pair for small subsets if the input data
|
||||
class TransformHypothesesGenerator
|
||||
{
|
||||
public:
|
||||
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
|
||||
const Mat& camera_mat_, int num_points_, int subset_size_,
|
||||
Mat rot_matrices_, Mat transl_vectors_)
|
||||
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
|
||||
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
|
||||
transl_vectors(transl_vectors_) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
{
|
||||
// Input data for generation of the current hypothesis
|
||||
vector<int> subset_indices(subset_size);
|
||||
Mat_<Point3f> object_subset(1, subset_size);
|
||||
Mat_<Point2f> image_subset(1, subset_size);
|
||||
|
||||
// Current hypothesis data
|
||||
Mat rot_vec(1, 3, CV_64F);
|
||||
Mat rot_mat(3, 3, CV_64F);
|
||||
Mat transl_vec(1, 3, CV_64F);
|
||||
|
||||
for (int iter = range.begin(); iter < range.end(); ++iter)
|
||||
{
|
||||
selectRandom(subset_size, num_points, subset_indices);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
{
|
||||
object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
|
||||
image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
|
||||
}
|
||||
|
||||
solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
|
||||
|
||||
// Remember translation vector
|
||||
Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
|
||||
transl_vec = transl_vec.reshape(0, 1);
|
||||
transl_vec.convertTo(transl_vec_, CV_32F);
|
||||
|
||||
// Remember rotation matrix
|
||||
Rodrigues(rot_vec, rot_mat);
|
||||
Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
|
||||
rot_mat.convertTo(rot_mat_, CV_32F);
|
||||
}
|
||||
}
|
||||
|
||||
const Mat* object;
|
||||
const Mat* image;
|
||||
const Mat* dist_coef;
|
||||
const Mat* camera_mat;
|
||||
int num_points;
|
||||
int subset_size;
|
||||
|
||||
// Hypotheses storage (global)
|
||||
Mat rot_matrices;
|
||||
Mat transl_vectors;
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
|
||||
int num_iters, float max_dist, int min_inlier_count,
|
||||
vector<int>* inliers)
|
||||
{
|
||||
(void)min_inlier_count;
|
||||
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
|
||||
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
|
||||
CV_Assert(object.cols == image.cols);
|
||||
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
|
||||
CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
|
||||
CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
|
||||
|
||||
const int subset_size = 4;
|
||||
const int num_points = object.cols;
|
||||
CV_Assert(num_points >= subset_size);
|
||||
|
||||
// Unapply distortion and intrinsic camera transformations
|
||||
Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
|
||||
Mat empty_dist_coef;
|
||||
Mat image_normalized;
|
||||
undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
|
||||
|
||||
// Hypotheses storage (global)
|
||||
Mat rot_matrices(1, num_iters * 9, CV_32F);
|
||||
Mat transl_vectors(1, num_iters * 3, CV_32F);
|
||||
|
||||
// Generate set of hypotheses using small subsets of the input data
|
||||
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
|
||||
num_points, subset_size, rot_matrices, transl_vectors);
|
||||
parallel_for(BlockedRange(0, num_iters), body);
|
||||
|
||||
// Compute scores (i.e. number of inliers) for each hypothesis
|
||||
GpuMat d_object(object);
|
||||
GpuMat d_image_normalized(image_normalized);
|
||||
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
|
||||
solve_pnp_ransac::computeHypothesisScores(
|
||||
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
|
||||
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
|
||||
d_hypothesis_scores.ptr<int>());
|
||||
|
||||
// Find the best hypothesis index
|
||||
Point best_idx;
|
||||
double best_score;
|
||||
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
|
||||
int num_inliers = static_cast<int>(best_score);
|
||||
|
||||
// Extract the best hypothesis data
|
||||
|
||||
Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
|
||||
Rodrigues(rot_mat, rvec);
|
||||
rvec = rvec.reshape(0, 1);
|
||||
|
||||
tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
|
||||
tvec = tvec.reshape(0, 1);
|
||||
|
||||
// Build vector of inlier indices
|
||||
if (inliers != NULL)
|
||||
{
|
||||
inliers->clear();
|
||||
inliers->reserve(num_inliers);
|
||||
|
||||
Point3f p, p_transf;
|
||||
Point2f p_proj;
|
||||
const float* rot = rot_mat.ptr<float>();
|
||||
const float* transl = tvec.ptr<float>();
|
||||
|
||||
for (int i = 0; i < num_points; ++i)
|
||||
{
|
||||
p = object.at<Point3f>(0, i);
|
||||
p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
|
||||
p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
|
||||
p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
|
||||
p_proj.x = p_transf.x / p_transf.z;
|
||||
p_proj.y = p_transf.y / p_transf.z;
|
||||
if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
|
||||
inliers->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,139 +1,139 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "cu_safe_call.h"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
#define error_entry(entry) { entry, #entry }
|
||||
|
||||
struct ErrorEntry
|
||||
{
|
||||
int code;
|
||||
std::string str;
|
||||
};
|
||||
|
||||
class ErrorEntryComparer
|
||||
{
|
||||
public:
|
||||
inline ErrorEntryComparer(int code) : code_(code) {}
|
||||
|
||||
inline bool operator()(const ErrorEntry& e) const { return e.code == code_; }
|
||||
|
||||
private:
|
||||
int code_;
|
||||
};
|
||||
|
||||
std::string getErrorString(int code, const ErrorEntry* errors, size_t n)
|
||||
{
|
||||
size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
|
||||
|
||||
const std::string& msg = (idx != n) ? errors[idx].str : std::string("Unknown error code");
|
||||
|
||||
std::ostringstream ostr;
|
||||
ostr << msg << " [Code = " << code << "]";
|
||||
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
const ErrorEntry cu_errors [] =
|
||||
{
|
||||
error_entry( CUDA_SUCCESS ),
|
||||
error_entry( CUDA_ERROR_INVALID_VALUE ),
|
||||
error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
|
||||
error_entry( CUDA_ERROR_NOT_INITIALIZED ),
|
||||
error_entry( CUDA_ERROR_DEINITIALIZED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_DISABLED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
|
||||
error_entry( CUDA_ERROR_NO_DEVICE ),
|
||||
error_entry( CUDA_ERROR_INVALID_DEVICE ),
|
||||
error_entry( CUDA_ERROR_INVALID_IMAGE ),
|
||||
error_entry( CUDA_ERROR_INVALID_CONTEXT ),
|
||||
error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
|
||||
error_entry( CUDA_ERROR_MAP_FAILED ),
|
||||
error_entry( CUDA_ERROR_UNMAP_FAILED ),
|
||||
error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
|
||||
error_entry( CUDA_ERROR_ALREADY_MAPPED ),
|
||||
error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
|
||||
error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
|
||||
error_entry( CUDA_ERROR_NOT_MAPPED ),
|
||||
error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
|
||||
error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
|
||||
error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
|
||||
error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
|
||||
error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
|
||||
error_entry( CUDA_ERROR_INVALID_SOURCE ),
|
||||
error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
|
||||
error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
|
||||
error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
|
||||
error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
|
||||
error_entry( CUDA_ERROR_INVALID_HANDLE ),
|
||||
error_entry( CUDA_ERROR_NOT_FOUND ),
|
||||
error_entry( CUDA_ERROR_NOT_READY ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_FAILED ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
|
||||
error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
|
||||
error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
|
||||
error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
|
||||
error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
|
||||
error_entry( CUDA_ERROR_ASSERT ),
|
||||
error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
|
||||
error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
|
||||
error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
|
||||
error_entry( CUDA_ERROR_UNKNOWN )
|
||||
};
|
||||
|
||||
const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
|
||||
}
|
||||
|
||||
std::string cv::gpu::detail::cuGetErrString(CUresult res)
|
||||
{
|
||||
return getErrorString(res, cu_errors, cu_errors_num);
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "cu_safe_call.h"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
#define error_entry(entry) { entry, #entry }
|
||||
|
||||
struct ErrorEntry
|
||||
{
|
||||
int code;
|
||||
std::string str;
|
||||
};
|
||||
|
||||
class ErrorEntryComparer
|
||||
{
|
||||
public:
|
||||
inline ErrorEntryComparer(int code) : code_(code) {}
|
||||
|
||||
inline bool operator()(const ErrorEntry& e) const { return e.code == code_; }
|
||||
|
||||
private:
|
||||
int code_;
|
||||
};
|
||||
|
||||
std::string getErrorString(int code, const ErrorEntry* errors, size_t n)
|
||||
{
|
||||
size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
|
||||
|
||||
const std::string& msg = (idx != n) ? errors[idx].str : std::string("Unknown error code");
|
||||
|
||||
std::ostringstream ostr;
|
||||
ostr << msg << " [Code = " << code << "]";
|
||||
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
const ErrorEntry cu_errors [] =
|
||||
{
|
||||
error_entry( CUDA_SUCCESS ),
|
||||
error_entry( CUDA_ERROR_INVALID_VALUE ),
|
||||
error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
|
||||
error_entry( CUDA_ERROR_NOT_INITIALIZED ),
|
||||
error_entry( CUDA_ERROR_DEINITIALIZED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_DISABLED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
|
||||
error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
|
||||
error_entry( CUDA_ERROR_NO_DEVICE ),
|
||||
error_entry( CUDA_ERROR_INVALID_DEVICE ),
|
||||
error_entry( CUDA_ERROR_INVALID_IMAGE ),
|
||||
error_entry( CUDA_ERROR_INVALID_CONTEXT ),
|
||||
error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
|
||||
error_entry( CUDA_ERROR_MAP_FAILED ),
|
||||
error_entry( CUDA_ERROR_UNMAP_FAILED ),
|
||||
error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
|
||||
error_entry( CUDA_ERROR_ALREADY_MAPPED ),
|
||||
error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
|
||||
error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
|
||||
error_entry( CUDA_ERROR_NOT_MAPPED ),
|
||||
error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
|
||||
error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
|
||||
error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
|
||||
error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
|
||||
error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
|
||||
error_entry( CUDA_ERROR_INVALID_SOURCE ),
|
||||
error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
|
||||
error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
|
||||
error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
|
||||
error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
|
||||
error_entry( CUDA_ERROR_INVALID_HANDLE ),
|
||||
error_entry( CUDA_ERROR_NOT_FOUND ),
|
||||
error_entry( CUDA_ERROR_NOT_READY ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_FAILED ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
|
||||
error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
|
||||
error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
|
||||
error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
|
||||
error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
|
||||
error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
|
||||
error_entry( CUDA_ERROR_ASSERT ),
|
||||
error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
|
||||
error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
|
||||
error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
|
||||
error_entry( CUDA_ERROR_UNKNOWN )
|
||||
};
|
||||
|
||||
const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
|
||||
}
|
||||
|
||||
std::string cv::gpu::detail::cuGetErrString(CUresult res)
|
||||
{
|
||||
return getErrorString(res, cu_errors, cu_errors_num);
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
@@ -1,67 +1,67 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __CU_SAFE_CALL_H__
|
||||
#define __CU_SAFE_CALL_H__
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
namespace detail
|
||||
{
|
||||
std::string cuGetErrString(CUresult res);
|
||||
|
||||
inline void cuSafeCall_impl(CUresult res, const char* file, int line)
|
||||
{
|
||||
if (res != CUDA_SUCCESS)
|
||||
cv::error( cv::Exception(CV_GpuApiCallError, cuGetErrString(res), "unknown function", file, line) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
#define cuSafeCall( op ) cv::gpu::detail::cuSafeCall_impl( (op), __FILE__, __LINE__ )
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
#endif // __CU_SAFE_CALL_H__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __CU_SAFE_CALL_H__
|
||||
#define __CU_SAFE_CALL_H__
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
namespace detail
|
||||
{
|
||||
std::string cuGetErrString(CUresult res);
|
||||
|
||||
inline void cuSafeCall_impl(CUresult res, const char* file, int line)
|
||||
{
|
||||
if (res != CUDA_SUCCESS)
|
||||
cv::error( cv::Exception(CV_GpuApiCallError, cuGetErrString(res), "unknown function", file, line) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
||||
#define cuSafeCall( op ) cv::gpu::detail::cuSafeCall_impl( (op), __FILE__, __LINE__ )
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
#endif // __CU_SAFE_CALL_H__
|
||||
|
||||
@@ -1,212 +1,212 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
NV12ToARGB color space conversion CUDA kernel
|
||||
|
||||
This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
|
||||
source and converts to output in ARGB format
|
||||
*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device {
|
||||
namespace video_decoding
|
||||
{
|
||||
__constant__ uint constAlpha = ((uint)0xff << 24);
|
||||
|
||||
__constant__ float constHueColorSpaceMat[9];
|
||||
|
||||
void loadHueCSC(float hueCSC[9])
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
|
||||
}
|
||||
|
||||
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
|
||||
{
|
||||
float luma, chromaCb, chromaCr;
|
||||
|
||||
// Prepare for hue adjustment
|
||||
luma = (float)yuvi[0];
|
||||
chromaCb = (float)((int)yuvi[1] - 512.0f);
|
||||
chromaCr = (float)((int)yuvi[2] - 512.0f);
|
||||
|
||||
// Convert YUV To RGB with hue adjustment
|
||||
*red = (luma * constHueColorSpaceMat[0]) +
|
||||
(chromaCb * constHueColorSpaceMat[1]) +
|
||||
(chromaCr * constHueColorSpaceMat[2]);
|
||||
|
||||
*green = (luma * constHueColorSpaceMat[3]) +
|
||||
(chromaCb * constHueColorSpaceMat[4]) +
|
||||
(chromaCr * constHueColorSpaceMat[5]);
|
||||
|
||||
*blue = (luma * constHueColorSpaceMat[6]) +
|
||||
(chromaCb * constHueColorSpaceMat[7]) +
|
||||
(chromaCr * constHueColorSpaceMat[8]);
|
||||
}
|
||||
|
||||
__device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha)
|
||||
{
|
||||
uint ARGBpixel = 0;
|
||||
|
||||
// Clamp final 10 bit results
|
||||
red = ::fmin(::fmax(red, 0.0f), 1023.f);
|
||||
green = ::fmin(::fmax(green, 0.0f), 1023.f);
|
||||
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
|
||||
|
||||
// Convert to 8 bit unsigned integers per color component
|
||||
ARGBpixel = (((uint)blue >> 2) |
|
||||
(((uint)green >> 2) << 8) |
|
||||
(((uint)red >> 2) << 16) |
|
||||
(uint)alpha);
|
||||
|
||||
return ARGBpixel;
|
||||
}
|
||||
|
||||
// CUDA kernel for outputing the final ARGB output from NV12
|
||||
|
||||
#define COLOR_COMPONENT_BIT_SIZE 10
|
||||
#define COLOR_COMPONENT_MASK 0x3FF
|
||||
|
||||
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
|
||||
uint* dstImage, size_t nDestPitch,
|
||||
uint width, uint height)
|
||||
{
|
||||
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
|
||||
const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height)
|
||||
return;
|
||||
|
||||
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
|
||||
// if we move to texture we could read 4 luminance values
|
||||
|
||||
uint yuv101010Pel[2];
|
||||
|
||||
yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2;
|
||||
yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
|
||||
|
||||
const size_t chromaOffset = nSourcePitch * height;
|
||||
|
||||
const int y_chroma = y >> 1;
|
||||
|
||||
if (y & 1) // odd scanline ?
|
||||
{
|
||||
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
|
||||
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
|
||||
|
||||
if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
|
||||
{
|
||||
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
|
||||
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
|
||||
}
|
||||
|
||||
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
|
||||
yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
}
|
||||
else
|
||||
{
|
||||
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
|
||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
}
|
||||
|
||||
// this steps performs the color conversion
|
||||
uint yuvi[6];
|
||||
float red[2], green[2], blue[2];
|
||||
|
||||
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
|
||||
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||
|
||||
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
|
||||
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||
|
||||
// YUV to RGB Transformation conversion
|
||||
YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
|
||||
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
|
||||
|
||||
// Clamp the results to RGBA
|
||||
|
||||
const size_t dstImagePitch = nDestPitch >> 2;
|
||||
|
||||
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
|
||||
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
|
||||
}
|
||||
|
||||
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
||||
|
||||
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
|
||||
interopFrame.cols, interopFrame.rows);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
NV12ToARGB color space conversion CUDA kernel
|
||||
|
||||
This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
|
||||
source and converts to output in ARGB format
|
||||
*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device {
|
||||
namespace video_decoding
|
||||
{
|
||||
__constant__ uint constAlpha = ((uint)0xff << 24);
|
||||
|
||||
__constant__ float constHueColorSpaceMat[9];
|
||||
|
||||
void loadHueCSC(float hueCSC[9])
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
|
||||
}
|
||||
|
||||
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
|
||||
{
|
||||
float luma, chromaCb, chromaCr;
|
||||
|
||||
// Prepare for hue adjustment
|
||||
luma = (float)yuvi[0];
|
||||
chromaCb = (float)((int)yuvi[1] - 512.0f);
|
||||
chromaCr = (float)((int)yuvi[2] - 512.0f);
|
||||
|
||||
// Convert YUV To RGB with hue adjustment
|
||||
*red = (luma * constHueColorSpaceMat[0]) +
|
||||
(chromaCb * constHueColorSpaceMat[1]) +
|
||||
(chromaCr * constHueColorSpaceMat[2]);
|
||||
|
||||
*green = (luma * constHueColorSpaceMat[3]) +
|
||||
(chromaCb * constHueColorSpaceMat[4]) +
|
||||
(chromaCr * constHueColorSpaceMat[5]);
|
||||
|
||||
*blue = (luma * constHueColorSpaceMat[6]) +
|
||||
(chromaCb * constHueColorSpaceMat[7]) +
|
||||
(chromaCr * constHueColorSpaceMat[8]);
|
||||
}
|
||||
|
||||
__device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha)
|
||||
{
|
||||
uint ARGBpixel = 0;
|
||||
|
||||
// Clamp final 10 bit results
|
||||
red = ::fmin(::fmax(red, 0.0f), 1023.f);
|
||||
green = ::fmin(::fmax(green, 0.0f), 1023.f);
|
||||
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
|
||||
|
||||
// Convert to 8 bit unsigned integers per color component
|
||||
ARGBpixel = (((uint)blue >> 2) |
|
||||
(((uint)green >> 2) << 8) |
|
||||
(((uint)red >> 2) << 16) |
|
||||
(uint)alpha);
|
||||
|
||||
return ARGBpixel;
|
||||
}
|
||||
|
||||
// CUDA kernel for outputing the final ARGB output from NV12
|
||||
|
||||
#define COLOR_COMPONENT_BIT_SIZE 10
|
||||
#define COLOR_COMPONENT_MASK 0x3FF
|
||||
|
||||
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
|
||||
uint* dstImage, size_t nDestPitch,
|
||||
uint width, uint height)
|
||||
{
|
||||
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
|
||||
const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height)
|
||||
return;
|
||||
|
||||
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
|
||||
// if we move to texture we could read 4 luminance values
|
||||
|
||||
uint yuv101010Pel[2];
|
||||
|
||||
yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2;
|
||||
yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
|
||||
|
||||
const size_t chromaOffset = nSourcePitch * height;
|
||||
|
||||
const int y_chroma = y >> 1;
|
||||
|
||||
if (y & 1) // odd scanline ?
|
||||
{
|
||||
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
|
||||
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
|
||||
|
||||
if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
|
||||
{
|
||||
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
|
||||
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
|
||||
}
|
||||
|
||||
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
|
||||
yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
}
|
||||
else
|
||||
{
|
||||
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
|
||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
}
|
||||
|
||||
// this steps performs the color conversion
|
||||
uint yuvi[6];
|
||||
float red[2], green[2], blue[2];
|
||||
|
||||
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
|
||||
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||
|
||||
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
|
||||
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||
|
||||
// YUV to RGB Transformation conversion
|
||||
YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
|
||||
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
|
||||
|
||||
// Clamp the results to RGBA
|
||||
|
||||
const size_t dstImagePitch = nDestPitch >> 2;
|
||||
|
||||
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
|
||||
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
|
||||
}
|
||||
|
||||
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
||||
|
||||
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
|
||||
interopFrame.cols, interopFrame.rows);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,472 +1,472 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bf_radius_match
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
|
||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||
|
||||
extern __shared__ int smem[];
|
||||
|
||||
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
||||
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
|
||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
Dist dist;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||
{
|
||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||
|
||||
if (loadX < query.cols)
|
||||
{
|
||||
T val;
|
||||
|
||||
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||
|
||||
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < BLOCK_SIZE; ++j)
|
||||
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
float distVal = (typename Dist::result_type)dist;
|
||||
|
||||
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
|
||||
{
|
||||
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
|
||||
if (ind < maxCount)
|
||||
{
|
||||
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
|
||||
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
|
||||
bestDistance.ptr(queryIdx)[ind] = distVal;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
const PtrStepSz<T> train = trains[i];
|
||||
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
if (masks != 0 && masks[i].data)
|
||||
{
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match
|
||||
|
||||
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||
__global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
|
||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||
|
||||
extern __shared__ int smem[];
|
||||
|
||||
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
||||
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
|
||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
Dist dist;
|
||||
|
||||
for (int i = 0, endi = (query.cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endi; ++i)
|
||||
{
|
||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||
|
||||
if (loadX < query.cols)
|
||||
{
|
||||
T val;
|
||||
|
||||
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||
|
||||
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < BLOCK_SIZE; ++j)
|
||||
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
float distVal = (typename Dist::result_type)dist;
|
||||
|
||||
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
|
||||
{
|
||||
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
|
||||
if (ind < maxCount)
|
||||
{
|
||||
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
|
||||
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
|
||||
bestDistance.ptr(queryIdx)[ind] = distVal;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
const PtrStepSz<T> train = trains[i];
|
||||
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
if (masks != 0 && masks[i].data)
|
||||
{
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
(void)cc;
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
{
|
||||
matchUnrolled<16, 128, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
/*else if (query.cols <= 256)
|
||||
{
|
||||
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}*/
|
||||
else
|
||||
{
|
||||
match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T>
|
||||
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
(void)cc;
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
{
|
||||
matchUnrolled<16, 128, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
/*else if (query.cols <= 256)
|
||||
{
|
||||
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}*/
|
||||
else
|
||||
{
|
||||
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Radius Match caller
|
||||
|
||||
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
//template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
} // namespace bf_radius_match
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bf_radius_match
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match Unrolled
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||
__global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
|
||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||
|
||||
extern __shared__ int smem[];
|
||||
|
||||
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
||||
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
|
||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
Dist dist;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||
{
|
||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||
|
||||
if (loadX < query.cols)
|
||||
{
|
||||
T val;
|
||||
|
||||
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||
|
||||
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < BLOCK_SIZE; ++j)
|
||||
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
float distVal = (typename Dist::result_type)dist;
|
||||
|
||||
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
|
||||
{
|
||||
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
|
||||
if (ind < maxCount)
|
||||
{
|
||||
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
|
||||
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
|
||||
bestDistance.ptr(queryIdx)[ind] = distVal;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
const PtrStepSz<T> train = trains[i];
|
||||
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
if (masks != 0 && masks[i].data)
|
||||
{
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match
|
||||
|
||||
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||
__global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
|
||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||
|
||||
extern __shared__ int smem[];
|
||||
|
||||
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
|
||||
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
|
||||
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
|
||||
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
||||
|
||||
Dist dist;
|
||||
|
||||
for (int i = 0, endi = (query.cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endi; ++i)
|
||||
{
|
||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
|
||||
|
||||
if (loadX < query.cols)
|
||||
{
|
||||
T val;
|
||||
|
||||
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||
|
||||
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < BLOCK_SIZE; ++j)
|
||||
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
float distVal = (typename Dist::result_type)dist;
|
||||
|
||||
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
|
||||
{
|
||||
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
|
||||
if (ind < maxCount)
|
||||
{
|
||||
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
|
||||
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
|
||||
bestDistance.ptr(queryIdx)[ind] = distVal;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
const PtrStepSz<T> train = trains[i];
|
||||
|
||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||
|
||||
if (masks != 0 && masks[i].data)
|
||||
{
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
else
|
||||
{
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match dispatcher
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
(void)cc;
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
{
|
||||
matchUnrolled<16, 128, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
/*else if (query.cols <= 256)
|
||||
{
|
||||
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}*/
|
||||
else
|
||||
{
|
||||
match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dist, typename T>
|
||||
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
(void)cc;
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
{
|
||||
matchUnrolled<16, 128, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
/*else if (query.cols <= 256)
|
||||
{
|
||||
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 512)
|
||||
{
|
||||
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
else if (query.cols <= 1024)
|
||||
{
|
||||
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}*/
|
||||
else
|
||||
{
|
||||
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Radius Match caller
|
||||
|
||||
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||
trainIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
//template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream)
|
||||
{
|
||||
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||
trainIdx, imgIdx, distance, nMatches,
|
||||
cc, stream);
|
||||
}
|
||||
|
||||
template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
//template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||
} // namespace bf_radius_match
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,201 +1,201 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
/// Bilateral filtering
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
|
||||
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
|
||||
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
|
||||
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
|
||||
|
||||
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
|
||||
|
||||
template<typename T, typename B>
|
||||
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if (x >= src.cols || y >= src.rows)
|
||||
return;
|
||||
|
||||
value_type center = saturate_cast<value_type>(src(y, x));
|
||||
|
||||
value_type sum1 = VecTraits<value_type>::all(0);
|
||||
float sum2 = 0;
|
||||
|
||||
int r = ksz / 2;
|
||||
float r2 = (float)(r * r);
|
||||
|
||||
int tx = x - r + ksz;
|
||||
int ty = y - r + ksz;
|
||||
|
||||
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
|
||||
{
|
||||
for (int cy = y - r; cy < ty; ++cy)
|
||||
for (int cx = x - r; cx < tx; ++cx)
|
||||
{
|
||||
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
|
||||
if (space2 > r2)
|
||||
continue;
|
||||
|
||||
value_type value = saturate_cast<value_type>(src(cy, cx));
|
||||
|
||||
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
|
||||
sum1 = sum1 + weight * value;
|
||||
sum2 = sum2 + weight;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int cy = y - r; cy < ty; ++cy)
|
||||
for (int cx = x - r; cx < tx; ++cx)
|
||||
{
|
||||
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
|
||||
if (space2 > r2)
|
||||
continue;
|
||||
|
||||
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
|
||||
|
||||
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
|
||||
|
||||
sum1 = sum1 + weight * value;
|
||||
sum2 = sum2 + weight;
|
||||
}
|
||||
}
|
||||
dst(y, x) = saturate_cast<T>(sum1 / sum2);
|
||||
}
|
||||
|
||||
template<typename T, template <typename> class B>
|
||||
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
|
||||
{
|
||||
dim3 block (32, 8);
|
||||
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
|
||||
|
||||
static caller_t funcs[] =
|
||||
{
|
||||
bilateral_caller<T, BrdReflect101>,
|
||||
bilateral_caller<T, BrdReplicate>,
|
||||
bilateral_caller<T, BrdConstant>,
|
||||
bilateral_caller<T, BrdReflect>,
|
||||
bilateral_caller<T, BrdWrap>,
|
||||
};
|
||||
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
|
||||
template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
|
||||
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
|
||||
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
/// Bilateral filtering
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
|
||||
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
|
||||
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
|
||||
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
|
||||
|
||||
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
|
||||
|
||||
template<typename T, typename B>
|
||||
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||
|
||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
if (x >= src.cols || y >= src.rows)
|
||||
return;
|
||||
|
||||
value_type center = saturate_cast<value_type>(src(y, x));
|
||||
|
||||
value_type sum1 = VecTraits<value_type>::all(0);
|
||||
float sum2 = 0;
|
||||
|
||||
int r = ksz / 2;
|
||||
float r2 = (float)(r * r);
|
||||
|
||||
int tx = x - r + ksz;
|
||||
int ty = y - r + ksz;
|
||||
|
||||
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
|
||||
{
|
||||
for (int cy = y - r; cy < ty; ++cy)
|
||||
for (int cx = x - r; cx < tx; ++cx)
|
||||
{
|
||||
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
|
||||
if (space2 > r2)
|
||||
continue;
|
||||
|
||||
value_type value = saturate_cast<value_type>(src(cy, cx));
|
||||
|
||||
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
|
||||
sum1 = sum1 + weight * value;
|
||||
sum2 = sum2 + weight;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int cy = y - r; cy < ty; ++cy)
|
||||
for (int cx = x - r; cx < tx; ++cx)
|
||||
{
|
||||
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
|
||||
if (space2 > r2)
|
||||
continue;
|
||||
|
||||
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
|
||||
|
||||
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
|
||||
|
||||
sum1 = sum1 + weight * value;
|
||||
sum2 = sum2 + weight;
|
||||
}
|
||||
}
|
||||
dst(y, x) = saturate_cast<T>(sum1 / sum2);
|
||||
}
|
||||
|
||||
template<typename T, template <typename> class B>
|
||||
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
|
||||
{
|
||||
dim3 block (32, 8);
|
||||
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
|
||||
|
||||
static caller_t funcs[] =
|
||||
{
|
||||
bilateral_caller<T, BrdReflect101>,
|
||||
bilateral_caller<T, BrdReplicate>,
|
||||
bilateral_caller<T, BrdConstant>,
|
||||
bilateral_caller<T, BrdReflect>,
|
||||
bilateral_caller<T, BrdWrap>,
|
||||
};
|
||||
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
|
||||
template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
|
||||
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
|
||||
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
|
||||
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float)
|
||||
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
|
||||
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,121 +1,121 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int x_ = x / cn;
|
||||
float w1 = weights1.ptr(y)[x_];
|
||||
float w2 = weights2.ptr(y)[x_];
|
||||
T p1 = img1.ptr(y)[x];
|
||||
T p2 = img2.ptr(y)[x];
|
||||
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
||||
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
|
||||
|
||||
|
||||
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
||||
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
float w1 = weights1.ptr(y)[x];
|
||||
float w2 = weights2.ptr(y)[x];
|
||||
float sum_inv = 1.f / (w1 + w2 + 1e-5f);
|
||||
w1 *= sum_inv;
|
||||
w2 *= sum_inv;
|
||||
uchar4 p1 = ((const uchar4*)img1.ptr(y))[x];
|
||||
uchar4 p2 = ((const uchar4*)img2.ptr(y))[x];
|
||||
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
|
||||
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
|
||||
}
|
||||
}
|
||||
|
||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace blend
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace blend
|
||||
{
|
||||
template <typename T>
|
||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
int x_ = x / cn;
|
||||
float w1 = weights1.ptr(y)[x_];
|
||||
float w2 = weights2.ptr(y)[x_];
|
||||
T p1 = img1.ptr(y)[x];
|
||||
T p2 = img2.ptr(y)[x];
|
||||
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
||||
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
|
||||
|
||||
|
||||
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
||||
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < rows && x < cols)
|
||||
{
|
||||
float w1 = weights1.ptr(y)[x];
|
||||
float w2 = weights2.ptr(y)[x];
|
||||
float sum_inv = 1.f / (w1 + w2 + 1e-5f);
|
||||
w1 *= sum_inv;
|
||||
w2 *= sum_inv;
|
||||
uchar4 p1 = ((const uchar4*)img1.ptr(y))[x];
|
||||
uchar4 p2 = ((const uchar4*)img2.ptr(y))[x];
|
||||
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
|
||||
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
|
||||
}
|
||||
}
|
||||
|
||||
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace blend
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,384 +1,384 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <internal_shared.hpp>
|
||||
#include <opencv2/gpu/device/transform.hpp>
|
||||
#include <opencv2/gpu/device/color.hpp>
|
||||
#include <cvt_colot_internal.h>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_x = 8 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
||||
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \
|
||||
{ \
|
||||
traits::functor_type functor = traits::create_functor(); \
|
||||
typedef typename traits::functor_type::argument_type src_t; \
|
||||
typedef typename traits::functor_type::result_type dst_t; \
|
||||
cv::gpu::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits<ushort>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <internal_shared.hpp>
|
||||
#include <opencv2/gpu/device/transform.hpp>
|
||||
#include <opencv2/gpu/device/color.hpp>
|
||||
#include <cvt_colot_internal.h>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_x = 8 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
|
||||
{
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
||||
{
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
||||
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \
|
||||
{ \
|
||||
traits::functor_type functor = traits::create_functor(); \
|
||||
typedef typename traits::functor_type::argument_type src_t; \
|
||||
typedef typename traits::functor_type::result_type dst_t; \
|
||||
cv::gpu::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits<ushort>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgra)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls4)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgb)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgba)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgra)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgr)
|
||||
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,388 +1,388 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/static_check.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace column_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 16;
|
||||
const int PATCH_PER_BLOCK = 4;
|
||||
const int HALO_SIZE = KSIZE <= 16 ? 1 : 2;
|
||||
#else
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 8;
|
||||
const int PATCH_PER_BLOCK = 2;
|
||||
const int HALO_SIZE = 2;
|
||||
#endif
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
|
||||
|
||||
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
|
||||
|
||||
if (x >= src.cols)
|
||||
return;
|
||||
|
||||
const T* src_col = src.ptr() + x;
|
||||
|
||||
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
|
||||
|
||||
if (blockIdx.y > 0)
|
||||
{
|
||||
//Upper halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
//Upper halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
|
||||
}
|
||||
|
||||
if (blockIdx.y + 2 < gridDim.y)
|
||||
{
|
||||
//Main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
|
||||
|
||||
//Lower halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
//Main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
|
||||
|
||||
//Lower halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
{
|
||||
const int y = yStart + j * BLOCK_DIM_Y;
|
||||
|
||||
if (y < src.rows)
|
||||
{
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < KSIZE; ++k)
|
||||
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
|
||||
|
||||
dst(y, x) = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||
{
|
||||
int BLOCK_DIM_X;
|
||||
int BLOCK_DIM_Y;
|
||||
int PATCH_PER_BLOCK;
|
||||
|
||||
if (cc >= 20)
|
||||
{
|
||||
BLOCK_DIM_X = 16;
|
||||
BLOCK_DIM_Y = 16;
|
||||
PATCH_PER_BLOCK = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
BLOCK_DIM_X = 16;
|
||||
BLOCK_DIM_Y = 8;
|
||||
PATCH_PER_BLOCK = 2;
|
||||
}
|
||||
|
||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
|
||||
|
||||
B<T> brd(src.rows);
|
||||
|
||||
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColReflect>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColWrap>
|
||||
}
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize, stream);
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
||||
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
} // namespace column_filter
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/static_check.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace column_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 16;
|
||||
const int PATCH_PER_BLOCK = 4;
|
||||
const int HALO_SIZE = KSIZE <= 16 ? 1 : 2;
|
||||
#else
|
||||
const int BLOCK_DIM_X = 16;
|
||||
const int BLOCK_DIM_Y = 8;
|
||||
const int PATCH_PER_BLOCK = 2;
|
||||
const int HALO_SIZE = 2;
|
||||
#endif
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
|
||||
|
||||
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
|
||||
|
||||
if (x >= src.cols)
|
||||
return;
|
||||
|
||||
const T* src_col = src.ptr() + x;
|
||||
|
||||
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
|
||||
|
||||
if (blockIdx.y > 0)
|
||||
{
|
||||
//Upper halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
//Upper halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
|
||||
}
|
||||
|
||||
if (blockIdx.y + 2 < gridDim.y)
|
||||
{
|
||||
//Main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
|
||||
|
||||
//Lower halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
//Main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
|
||||
|
||||
//Lower halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
{
|
||||
const int y = yStart + j * BLOCK_DIM_Y;
|
||||
|
||||
if (y < src.rows)
|
||||
{
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < KSIZE; ++k)
|
||||
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
|
||||
|
||||
dst(y, x) = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||
{
|
||||
int BLOCK_DIM_X;
|
||||
int BLOCK_DIM_Y;
|
||||
int PATCH_PER_BLOCK;
|
||||
|
||||
if (cc >= 20)
|
||||
{
|
||||
BLOCK_DIM_X = 16;
|
||||
BLOCK_DIM_Y = 16;
|
||||
PATCH_PER_BLOCK = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
BLOCK_DIM_X = 16;
|
||||
BLOCK_DIM_Y = 8;
|
||||
PATCH_PER_BLOCK = 2;
|
||||
}
|
||||
|
||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
|
||||
|
||||
B<T> brd(src.rows);
|
||||
|
||||
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColReflect101>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColReplicate>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColConstant>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColReflect>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColReflect>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearColumnFilter_caller< 1, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 2, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 3, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 4, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 5, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 6, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 7, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 8, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller< 9, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<10, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<11, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<12, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<13, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<14, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<15, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<16, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<17, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<18, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<19, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<20, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<21, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<22, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<23, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<24, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<25, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<26, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<27, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<28, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<29, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<30, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<31, T, D, BrdColWrap>,
|
||||
linearColumnFilter_caller<32, T, D, BrdColWrap>
|
||||
}
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize, stream);
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
||||
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
} // namespace column_filter
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
||||
@@ -1,131 +1,131 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz<T> dst, int top, int left)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst.ptr(y)[x] = src(y - top, x - left);
|
||||
}
|
||||
|
||||
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, int top, int left,
|
||||
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<T> brd(src.rows, src.cols, VecTraits<T>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
|
||||
|
||||
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode,
|
||||
const T* borderValue, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
||||
|
||||
typedef void (*caller_t)(const PtrStepSz<vec_type>& src, const PtrStepSz<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5] =
|
||||
{
|
||||
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
||||
};
|
||||
|
||||
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
|
||||
}
|
||||
|
||||
template void copyMakeBorder_gpu<uchar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<uchar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<uchar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<uchar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
|
||||
//template void copyMakeBorder_gpu<schar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<schar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<schar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<schar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
|
||||
template void copyMakeBorder_gpu<ushort, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<ushort, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<ushort, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<ushort, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
|
||||
template void copyMakeBorder_gpu<short, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<short, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<short, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<short, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
|
||||
//template void copyMakeBorder_gpu<int, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<int, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<int, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<int, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
|
||||
template void copyMakeBorder_gpu<float, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<float, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz<T> dst, int top, int left)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst.ptr(y)[x] = src(y - top, x - left);
|
||||
}
|
||||
|
||||
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
||||
{
|
||||
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, int top, int left,
|
||||
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<T> brd(src.rows, src.cols, VecTraits<T>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
|
||||
|
||||
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode,
|
||||
const T* borderValue, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
||||
|
||||
typedef void (*caller_t)(const PtrStepSz<vec_type>& src, const PtrStepSz<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5] =
|
||||
{
|
||||
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
||||
};
|
||||
|
||||
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
|
||||
}
|
||||
|
||||
template void copyMakeBorder_gpu<uchar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<uchar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<uchar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<uchar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||
|
||||
//template void copyMakeBorder_gpu<schar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<schar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<schar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<schar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||
|
||||
template void copyMakeBorder_gpu<ushort, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<ushort, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<ushort, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<ushort, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||
|
||||
template void copyMakeBorder_gpu<short, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<short, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<short, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<short, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||
|
||||
//template void copyMakeBorder_gpu<int, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<int, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<int, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<int, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||
|
||||
template void copyMakeBorder_gpu<float, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
//template void copyMakeBorder_gpu<float, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -1,151 +1,151 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace gfft
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__device__ uint g_counter = 0;
|
||||
|
||||
template <class Mask> __global__ void findCorners(float threshold, const Mask mask, float2* corners, uint max_count, int rows, int cols)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 110
|
||||
|
||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j))
|
||||
{
|
||||
float val = tex2D(eigTex, j, i);
|
||||
|
||||
if (val > threshold)
|
||||
{
|
||||
float maxVal = val;
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal);
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal);
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal);
|
||||
|
||||
if (val == maxVal)
|
||||
{
|
||||
const uint ind = atomicInc(&g_counter, (uint)(-1));
|
||||
|
||||
if (ind < max_count)
|
||||
corners[ind] = make_float2(j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 110
|
||||
}
|
||||
|
||||
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(uint)) );
|
||||
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
|
||||
|
||||
if (mask.data)
|
||||
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
|
||||
else
|
||||
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
uint count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return min(count, max_count);
|
||||
}
|
||||
|
||||
class EigGreater
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
|
||||
{
|
||||
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
|
||||
{
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
thrust::device_ptr<float2> ptr(corners);
|
||||
|
||||
thrust::sort(ptr, ptr + count, EigGreater());
|
||||
}
|
||||
} // namespace optical_flow
|
||||
}}}
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace gfft
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__device__ uint g_counter = 0;
|
||||
|
||||
template <class Mask> __global__ void findCorners(float threshold, const Mask mask, float2* corners, uint max_count, int rows, int cols)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 110
|
||||
|
||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j))
|
||||
{
|
||||
float val = tex2D(eigTex, j, i);
|
||||
|
||||
if (val > threshold)
|
||||
{
|
||||
float maxVal = val;
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal);
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal);
|
||||
|
||||
maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal);
|
||||
maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal);
|
||||
|
||||
if (val == maxVal)
|
||||
{
|
||||
const uint ind = atomicInc(&g_counter, (uint)(-1));
|
||||
|
||||
if (ind < max_count)
|
||||
corners[ind] = make_float2(j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 110
|
||||
}
|
||||
|
||||
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(uint)) );
|
||||
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
|
||||
|
||||
if (mask.data)
|
||||
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
|
||||
else
|
||||
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
uint count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return min(count, max_count);
|
||||
}
|
||||
|
||||
class EigGreater
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
|
||||
{
|
||||
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
|
||||
{
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
thrust::device_ptr<float2> ptr(corners);
|
||||
|
||||
thrust::sort(ptr, ptr + count, EigGreater());
|
||||
}
|
||||
} // namespace optical_flow
|
||||
}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,224 +1,224 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#define UINT_BITS 32U
|
||||
|
||||
//Warps == subhistograms per threadblock
|
||||
#define WARP_COUNT 6
|
||||
|
||||
//Threadblock size
|
||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
|
||||
#define HISTOGRAM256_BIN_COUNT 256
|
||||
|
||||
//Shared memory per threadblock
|
||||
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
|
||||
|
||||
#define PARTIAL_HISTOGRAM256_COUNT 240
|
||||
|
||||
#define MERGE_THREADBLOCK_SIZE 256
|
||||
|
||||
#define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
|
||||
|
||||
namespace hist
|
||||
{
|
||||
#if (!USE_SMEM_ATOMICS)
|
||||
|
||||
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
||||
|
||||
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
|
||||
{
|
||||
uint count;
|
||||
do
|
||||
{
|
||||
count = s_WarpHist[data] & TAG_MASK;
|
||||
count = threadTag | (count + 1);
|
||||
s_WarpHist[data] = count;
|
||||
} while (s_WarpHist[data] != count);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TAG_MASK 0xFFFFFFFFU
|
||||
|
||||
__forceinline__ __device__ void addByte(uint* s_WarpHist, uint data, uint threadTag)
|
||||
{
|
||||
atomicAdd(s_WarpHist + data, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
|
||||
{
|
||||
uint x = pos_x << 2;
|
||||
|
||||
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
|
||||
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
|
||||
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
|
||||
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
|
||||
}
|
||||
|
||||
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
|
||||
{
|
||||
//Per-warp subhistogram storage
|
||||
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||
|
||||
//Clear shared memory storage for current threadblock before processing
|
||||
#pragma unroll
|
||||
for (uint i = 0; i < (HISTOGRAM256_THREADBLOCK_MEMORY / HISTOGRAM256_THREADBLOCK_SIZE); i++)
|
||||
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
|
||||
|
||||
//Cycle through the entire data set, update subhistograms for each warp
|
||||
const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
|
||||
|
||||
__syncthreads();
|
||||
const uint colsui = d_Data.step / sizeof(uint);
|
||||
for(uint pos = blockIdx.x * blockDim.x + threadIdx.x; pos < dataCount; pos += blockDim.x * gridDim.x)
|
||||
{
|
||||
uint pos_y = pos / colsui;
|
||||
uint pos_x = pos % colsui;
|
||||
uint data = d_Data.ptr(pos_y)[pos_x];
|
||||
addWord(s_WarpHist, data, tag, pos_x, cols);
|
||||
}
|
||||
|
||||
//Merge per-warp histograms into per-block and write to global memory
|
||||
__syncthreads();
|
||||
for(uint bin = threadIdx.x; bin < HISTOGRAM256_BIN_COUNT; bin += HISTOGRAM256_THREADBLOCK_SIZE)
|
||||
{
|
||||
uint sum = 0;
|
||||
|
||||
for (uint i = 0; i < WARP_COUNT; i++)
|
||||
sum += s_Hist[bin + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK;
|
||||
|
||||
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Merge histogram256() output
|
||||
// Run one threadblock per bin; each threadblock adds up the same bin counter
|
||||
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
|
||||
// takes only a fraction of total processing time
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
|
||||
{
|
||||
uint sum = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (uint i = threadIdx.x; i < PARTIAL_HISTOGRAM256_COUNT; i += MERGE_THREADBLOCK_SIZE)
|
||||
sum += d_PartialHistograms[blockIdx.x + i * HISTOGRAM256_BIN_COUNT];
|
||||
|
||||
__shared__ uint data[MERGE_THREADBLOCK_SIZE];
|
||||
data[threadIdx.x] = sum;
|
||||
|
||||
for (uint stride = MERGE_THREADBLOCK_SIZE / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
__syncthreads();
|
||||
if(threadIdx.x < stride)
|
||||
data[threadIdx.x] += data[threadIdx.x + stride];
|
||||
}
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
|
||||
}
|
||||
|
||||
void histogram256_gpu(PtrStepSzb src, int* hist, uint* buf, cudaStream_t stream)
|
||||
{
|
||||
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
||||
PtrStepSz<uint>(src),
|
||||
buf,
|
||||
static_cast<uint>(src.rows * src.step / sizeof(uint)),
|
||||
src.cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
mergeHistogram256<<<HISTOGRAM256_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(buf, hist);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__constant__ int c_lut[256];
|
||||
|
||||
__global__ void equalizeHist(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows)
|
||||
{
|
||||
const uchar val = src.ptr(y)[x];
|
||||
const int lut = c_lut[val];
|
||||
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
|
||||
}
|
||||
}
|
||||
|
||||
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||
|
||||
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
} // namespace hist
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
#define UINT_BITS 32U
|
||||
|
||||
//Warps == subhistograms per threadblock
|
||||
#define WARP_COUNT 6
|
||||
|
||||
//Threadblock size
|
||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
|
||||
#define HISTOGRAM256_BIN_COUNT 256
|
||||
|
||||
//Shared memory per threadblock
|
||||
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
|
||||
|
||||
#define PARTIAL_HISTOGRAM256_COUNT 240
|
||||
|
||||
#define MERGE_THREADBLOCK_SIZE 256
|
||||
|
||||
#define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
|
||||
|
||||
namespace hist
|
||||
{
|
||||
#if (!USE_SMEM_ATOMICS)
|
||||
|
||||
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
||||
|
||||
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
|
||||
{
|
||||
uint count;
|
||||
do
|
||||
{
|
||||
count = s_WarpHist[data] & TAG_MASK;
|
||||
count = threadTag | (count + 1);
|
||||
s_WarpHist[data] = count;
|
||||
} while (s_WarpHist[data] != count);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TAG_MASK 0xFFFFFFFFU
|
||||
|
||||
__forceinline__ __device__ void addByte(uint* s_WarpHist, uint data, uint threadTag)
|
||||
{
|
||||
atomicAdd(s_WarpHist + data, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
|
||||
{
|
||||
uint x = pos_x << 2;
|
||||
|
||||
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
|
||||
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
|
||||
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
|
||||
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
|
||||
}
|
||||
|
||||
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
|
||||
{
|
||||
//Per-warp subhistogram storage
|
||||
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||
|
||||
//Clear shared memory storage for current threadblock before processing
|
||||
#pragma unroll
|
||||
for (uint i = 0; i < (HISTOGRAM256_THREADBLOCK_MEMORY / HISTOGRAM256_THREADBLOCK_SIZE); i++)
|
||||
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
|
||||
|
||||
//Cycle through the entire data set, update subhistograms for each warp
|
||||
const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
|
||||
|
||||
__syncthreads();
|
||||
const uint colsui = d_Data.step / sizeof(uint);
|
||||
for(uint pos = blockIdx.x * blockDim.x + threadIdx.x; pos < dataCount; pos += blockDim.x * gridDim.x)
|
||||
{
|
||||
uint pos_y = pos / colsui;
|
||||
uint pos_x = pos % colsui;
|
||||
uint data = d_Data.ptr(pos_y)[pos_x];
|
||||
addWord(s_WarpHist, data, tag, pos_x, cols);
|
||||
}
|
||||
|
||||
//Merge per-warp histograms into per-block and write to global memory
|
||||
__syncthreads();
|
||||
for(uint bin = threadIdx.x; bin < HISTOGRAM256_BIN_COUNT; bin += HISTOGRAM256_THREADBLOCK_SIZE)
|
||||
{
|
||||
uint sum = 0;
|
||||
|
||||
for (uint i = 0; i < WARP_COUNT; i++)
|
||||
sum += s_Hist[bin + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK;
|
||||
|
||||
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Merge histogram256() output
|
||||
// Run one threadblock per bin; each threadblock adds up the same bin counter
|
||||
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
|
||||
// takes only a fraction of total processing time
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
|
||||
{
|
||||
uint sum = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (uint i = threadIdx.x; i < PARTIAL_HISTOGRAM256_COUNT; i += MERGE_THREADBLOCK_SIZE)
|
||||
sum += d_PartialHistograms[blockIdx.x + i * HISTOGRAM256_BIN_COUNT];
|
||||
|
||||
__shared__ uint data[MERGE_THREADBLOCK_SIZE];
|
||||
data[threadIdx.x] = sum;
|
||||
|
||||
for (uint stride = MERGE_THREADBLOCK_SIZE / 2; stride > 0; stride >>= 1)
|
||||
{
|
||||
__syncthreads();
|
||||
if(threadIdx.x < stride)
|
||||
data[threadIdx.x] += data[threadIdx.x + stride];
|
||||
}
|
||||
|
||||
if(threadIdx.x == 0)
|
||||
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
|
||||
}
|
||||
|
||||
void histogram256_gpu(PtrStepSzb src, int* hist, uint* buf, cudaStream_t stream)
|
||||
{
|
||||
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
||||
PtrStepSz<uint>(src),
|
||||
buf,
|
||||
static_cast<uint>(src.rows * src.step / sizeof(uint)),
|
||||
src.cols);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
mergeHistogram256<<<HISTOGRAM256_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(buf, hist);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__constant__ int c_lut[256];
|
||||
|
||||
__global__ void equalizeHist(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows)
|
||||
{
|
||||
const uchar val = src.ptr(y)[x];
|
||||
const int lut = c_lut[val];
|
||||
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
|
||||
}
|
||||
}
|
||||
|
||||
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||
|
||||
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
} // namespace hist
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,100 +1,100 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_internal_shared_HPP__
|
||||
#define __OPENCV_internal_shared_HPP__
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <npp.h>
|
||||
#include "NPP_staging.hpp"
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "safe_call.hpp"
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
enum
|
||||
{
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU,
|
||||
BORDER_CONSTANT_GPU,
|
||||
BORDER_REFLECT_GPU,
|
||||
BORDER_WRAP_GPU
|
||||
};
|
||||
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStreamHandler(cudaStream_t newStream = 0)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStreamHandler()
|
||||
{
|
||||
nppSetStream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
|
||||
class NppStStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
|
||||
{
|
||||
oldStream = nppStSetActiveCUDAstream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStStreamHandler()
|
||||
{
|
||||
nppStSetActiveCUDAstream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif /* __OPENCV_internal_shared_HPP__ */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_internal_shared_HPP__
|
||||
#define __OPENCV_internal_shared_HPP__
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <npp.h>
|
||||
#include "NPP_staging.hpp"
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "safe_call.hpp"
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
enum
|
||||
{
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU,
|
||||
BORDER_CONSTANT_GPU,
|
||||
BORDER_REFLECT_GPU,
|
||||
BORDER_WRAP_GPU
|
||||
};
|
||||
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStreamHandler(cudaStream_t newStream = 0)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStreamHandler()
|
||||
{
|
||||
nppSetStream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
|
||||
class NppStStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
|
||||
{
|
||||
oldStream = nppStSetActiveCUDAstream(newStream);
|
||||
}
|
||||
|
||||
inline ~NppStStreamHandler()
|
||||
{
|
||||
nppStSetActiveCUDAstream(oldStream);
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
}}
|
||||
|
||||
#endif /* __OPENCV_internal_shared_HPP__ */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,217 +1,217 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace mathfunc
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Cart <-> Polar
|
||||
|
||||
struct Nothing
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
||||
{
|
||||
}
|
||||
};
|
||||
struct Magnitude
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
|
||||
}
|
||||
};
|
||||
struct MagnitudeSqr
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
|
||||
}
|
||||
};
|
||||
struct Atan2
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||
{
|
||||
float angle = ::atan2f(y_data, x_data);
|
||||
angle += (angle < 0) * 2.0 * CV_PI;
|
||||
dst[y * dst_step + x] = scale * angle;
|
||||
}
|
||||
};
|
||||
template <typename Mag, typename Angle>
|
||||
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
||||
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
float x_data = xptr[y * x_step + x];
|
||||
float y_data = yptr[y * y_step + x];
|
||||
|
||||
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
|
||||
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
|
||||
}
|
||||
}
|
||||
|
||||
struct NonEmptyMag
|
||||
{
|
||||
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
|
||||
{
|
||||
return mag[y * mag_step + x];
|
||||
}
|
||||
};
|
||||
struct EmptyMag
|
||||
{
|
||||
static __device__ __forceinline__ float get(const float*, size_t, int, int)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
};
|
||||
template <typename Mag>
|
||||
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
|
||||
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
float mag_data = Mag::get(mag, mag_step, x, y);
|
||||
float angle_data = angle[y * angle_step + x];
|
||||
float sin_a, cos_a;
|
||||
|
||||
::sincosf(scale * angle_data, &sin_a, &cos_a);
|
||||
|
||||
xptr[y * x_step + x] = mag_data * cos_a;
|
||||
yptr[y * y_step + x] = mag_data * sin_a;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Mag, typename Angle>
|
||||
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(x.cols, threads.x);
|
||||
grid.y = divUp(x.rows, threads.y);
|
||||
|
||||
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
|
||||
|
||||
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2][2][2] =
|
||||
{
|
||||
{
|
||||
{
|
||||
cartToPolar_caller<Magnitude, Atan2>,
|
||||
cartToPolar_caller<Magnitude, Nothing>
|
||||
},
|
||||
{
|
||||
cartToPolar_caller<MagnitudeSqr, Atan2>,
|
||||
cartToPolar_caller<MagnitudeSqr, Nothing>,
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
cartToPolar_caller<Nothing, Atan2>,
|
||||
cartToPolar_caller<Nothing, Nothing>
|
||||
},
|
||||
{
|
||||
cartToPolar_caller<Nothing, Atan2>,
|
||||
cartToPolar_caller<Nothing, Nothing>,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
|
||||
}
|
||||
|
||||
template <typename Mag>
|
||||
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(mag.cols, threads.x);
|
||||
grid.y = divUp(mag.rows, threads.y);
|
||||
|
||||
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
|
||||
|
||||
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2] =
|
||||
{
|
||||
polarToCart_caller<NonEmptyMag>,
|
||||
polarToCart_caller<EmptyMag>
|
||||
};
|
||||
|
||||
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||
}
|
||||
} // namespace mathfunc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace mathfunc
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Cart <-> Polar
|
||||
|
||||
struct Nothing
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
||||
{
|
||||
}
|
||||
};
|
||||
struct Magnitude
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
|
||||
}
|
||||
};
|
||||
struct MagnitudeSqr
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
|
||||
}
|
||||
};
|
||||
struct Atan2
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||
{
|
||||
float angle = ::atan2f(y_data, x_data);
|
||||
angle += (angle < 0) * 2.0 * CV_PI;
|
||||
dst[y * dst_step + x] = scale * angle;
|
||||
}
|
||||
};
|
||||
template <typename Mag, typename Angle>
|
||||
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
||||
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
float x_data = xptr[y * x_step + x];
|
||||
float y_data = yptr[y * y_step + x];
|
||||
|
||||
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
|
||||
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
|
||||
}
|
||||
}
|
||||
|
||||
struct NonEmptyMag
|
||||
{
|
||||
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
|
||||
{
|
||||
return mag[y * mag_step + x];
|
||||
}
|
||||
};
|
||||
struct EmptyMag
|
||||
{
|
||||
static __device__ __forceinline__ float get(const float*, size_t, int, int)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
};
|
||||
template <typename Mag>
|
||||
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
|
||||
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
float mag_data = Mag::get(mag, mag_step, x, y);
|
||||
float angle_data = angle[y * angle_step + x];
|
||||
float sin_a, cos_a;
|
||||
|
||||
::sincosf(scale * angle_data, &sin_a, &cos_a);
|
||||
|
||||
xptr[y * x_step + x] = mag_data * cos_a;
|
||||
yptr[y * y_step + x] = mag_data * sin_a;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Mag, typename Angle>
|
||||
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(x.cols, threads.x);
|
||||
grid.y = divUp(x.rows, threads.y);
|
||||
|
||||
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
|
||||
|
||||
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2][2][2] =
|
||||
{
|
||||
{
|
||||
{
|
||||
cartToPolar_caller<Magnitude, Atan2>,
|
||||
cartToPolar_caller<Magnitude, Nothing>
|
||||
},
|
||||
{
|
||||
cartToPolar_caller<MagnitudeSqr, Atan2>,
|
||||
cartToPolar_caller<MagnitudeSqr, Nothing>,
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
cartToPolar_caller<Nothing, Atan2>,
|
||||
cartToPolar_caller<Nothing, Nothing>
|
||||
},
|
||||
{
|
||||
cartToPolar_caller<Nothing, Atan2>,
|
||||
cartToPolar_caller<Nothing, Nothing>,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
|
||||
}
|
||||
|
||||
template <typename Mag>
|
||||
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(mag.cols, threads.x);
|
||||
grid.y = divUp(mag.rows, threads.y);
|
||||
|
||||
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
|
||||
|
||||
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2] =
|
||||
{
|
||||
polarToCart_caller<NonEmptyMag>,
|
||||
polarToCart_caller<EmptyMag>
|
||||
};
|
||||
|
||||
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||
}
|
||||
} // namespace mathfunc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -74,12 +74,12 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
const int i = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
const int j = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
|
||||
if (j >= dst.cols || i >= dst.rows)
|
||||
return;
|
||||
|
||||
|
||||
int bsize = search_radius + block_radius;
|
||||
int search_window = 2 * search_radius + 1;
|
||||
int search_window = 2 * search_radius + 1;
|
||||
float minus_search_window2_inv = -1.f/(search_window * search_window);
|
||||
|
||||
value_type sum1 = VecTraits<value_type>::all(0);
|
||||
@@ -87,16 +87,16 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
if (j - bsize >= 0 && j + bsize < dst.cols && i - bsize >= 0 && i + bsize < dst.rows)
|
||||
{
|
||||
for(float y = -search_radius; y <= search_radius; ++y)
|
||||
for(float y = -search_radius; y <= search_radius; ++y)
|
||||
for(float x = -search_radius; x <= search_radius; ++x)
|
||||
{
|
||||
{
|
||||
float dist2 = 0;
|
||||
for(float ty = -block_radius; ty <= block_radius; ++ty)
|
||||
for(float tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
value_type bv = saturate_cast<value_type>(src(i + y + ty, j + x + tx));
|
||||
value_type av = saturate_cast<value_type>(src(i + ty, j + tx));
|
||||
|
||||
|
||||
dist2 += norm2(av - bv);
|
||||
}
|
||||
|
||||
@@ -119,10 +119,10 @@ namespace cv { namespace gpu { namespace device
|
||||
for(float tx = -block_radius; tx <= block_radius; ++tx)
|
||||
{
|
||||
value_type bv = saturate_cast<value_type>(b.at(i + y + ty, j + x + tx, src));
|
||||
value_type av = saturate_cast<value_type>(b.at(i + ty, j + tx, src));
|
||||
value_type av = saturate_cast<value_type>(b.at(i + ty, j + tx, src));
|
||||
dist2 += norm2(av - bv);
|
||||
}
|
||||
|
||||
|
||||
float w = __expf(dist2 * noise_mult + (x * x + y * y) * minus_search_window2_inv);
|
||||
|
||||
sum1 = sum1 + w * saturate_cast<value_type>(b.at(i + y, j + x, src));
|
||||
@@ -144,9 +144,9 @@ namespace cv { namespace gpu { namespace device
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
int block_window = 2 * block_radius + 1;
|
||||
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
|
||||
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
|
||||
float noise_mult = minus_h2_inv/(block_window * block_window);
|
||||
|
||||
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
nlm_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, search_radius, block_radius, noise_mult);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
@@ -160,7 +160,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb dst, int search_radius, int block_radius, float h, cudaStream_t stream);
|
||||
|
||||
static func_t funcs[] =
|
||||
static func_t funcs[] =
|
||||
{
|
||||
nlm_caller<T, BrdReflect101>,
|
||||
nlm_caller<T, BrdReplicate>,
|
||||
@@ -183,7 +183,7 @@ namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
{
|
||||
__device__ __forceinline__ int calcDist(const uchar& a, const uchar& b) { return (a-b)*(a-b); }
|
||||
__device__ __forceinline__ int calcDist(const uchar2& a, const uchar2& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y); }
|
||||
__device__ __forceinline__ int calcDist(const uchar3& a, const uchar3& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y) + (a.z-b.z)*(a.z-b.z); }
|
||||
@@ -193,7 +193,7 @@ namespace cv { namespace gpu { namespace device
|
||||
enum
|
||||
{
|
||||
CTA_SIZE = 128,
|
||||
|
||||
|
||||
TILE_COLS = 128,
|
||||
TILE_ROWS = 32,
|
||||
|
||||
@@ -217,7 +217,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
PtrStep<T> src;
|
||||
mutable PtrStepi buffer;
|
||||
|
||||
|
||||
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
@@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace device
|
||||
int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx));
|
||||
|
||||
dist_sums[index] += dist;
|
||||
col_sums(tx + block_radius, index) += dist;
|
||||
col_sums(tx + block_radius, index) += dist;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -265,9 +265,9 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
|
||||
{
|
||||
{
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
@@ -280,8 +280,8 @@ namespace cv { namespace gpu { namespace device
|
||||
int col_sum = 0;
|
||||
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty)
|
||||
col_sum += calcDist(src(ay + ty, ax), src(by + ty, bx));
|
||||
|
||||
col_sum += calcDist(src(ay + ty, ax), src(by + ty, bx));
|
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index);
|
||||
|
||||
col_sums(first, index) = col_sum;
|
||||
@@ -298,7 +298,7 @@ namespace cv { namespace gpu { namespace device
|
||||
T a_down = src(ay + block_radius, ax);
|
||||
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
|
||||
{
|
||||
{
|
||||
int y = index / search_window;
|
||||
int x = index - y * search_window;
|
||||
|
||||
@@ -309,9 +309,9 @@ namespace cv { namespace gpu { namespace device
|
||||
T b_down = src(by + block_radius, bx);
|
||||
|
||||
int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up);
|
||||
|
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index);
|
||||
col_sums(first, index) = col_sum;
|
||||
col_sums(first, index) = col_sum;
|
||||
up_col_sums(j, index) = col_sum;
|
||||
}
|
||||
}
|
||||
@@ -339,14 +339,14 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
sum = sum + weight * saturate_cast<sum_type>(src(sy + y, sx + x));
|
||||
}
|
||||
|
||||
|
||||
volatile __shared__ float cta_buffer[CTA_SIZE];
|
||||
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
cta_buffer[tid] = weights_sum;
|
||||
__syncthreads();
|
||||
Block::reduce<CTA_SIZE>(cta_buffer, plus());
|
||||
Block::reduce<CTA_SIZE>(cta_buffer, plus());
|
||||
weights_sum = cta_buffer[0];
|
||||
|
||||
__syncthreads();
|
||||
@@ -356,7 +356,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
cta_buffer[tid] = reinterpret_cast<float*>(&sum)[n];
|
||||
__syncthreads();
|
||||
Block::reduce<CTA_SIZE>(cta_buffer, plus());
|
||||
Block::reduce<CTA_SIZE>(cta_buffer, plus());
|
||||
reinterpret_cast<float*>(&sum)[n] = cta_buffer[0];
|
||||
|
||||
__syncthreads();
|
||||
@@ -388,7 +388,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
for (int i = tby; i < tey; ++i)
|
||||
for (int j = tbx; j < tex; ++j)
|
||||
{
|
||||
{
|
||||
__syncthreads();
|
||||
|
||||
if (j == tbx)
|
||||
@@ -397,7 +397,7 @@ namespace cv { namespace gpu { namespace device
|
||||
first = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
if (i == tby)
|
||||
shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums);
|
||||
else
|
||||
@@ -407,7 +407,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
|
||||
convolve_window(i, j, dist_sums, col_sums, up_col_sums, dst(i, j));
|
||||
}
|
||||
}
|
||||
@@ -418,7 +418,7 @@ namespace cv { namespace gpu { namespace device
|
||||
__global__ void fast_nlm_kernel(const FastNonLocalMenas<T> fnlm, PtrStepSz<T> dst) { fnlm(dst); }
|
||||
|
||||
void nln_fast_get_buffer_size(const PtrStepSzb& src, int search_window, int block_window, int& buffer_cols, int& buffer_rows)
|
||||
{
|
||||
{
|
||||
typedef FastNonLocalMenas<uchar> FNLM;
|
||||
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
|
||||
|
||||
@@ -434,13 +434,13 @@ namespace cv { namespace gpu { namespace device
|
||||
FNLM fnlm(search_window, block_window, h);
|
||||
|
||||
fnlm.src = (PtrStepSz<T>)src;
|
||||
fnlm.buffer = buffer;
|
||||
fnlm.buffer = buffer;
|
||||
|
||||
dim3 block(FNLM::CTA_SIZE, 1);
|
||||
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
|
||||
int smem = search_window * search_window * sizeof(int);
|
||||
|
||||
|
||||
|
||||
fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst);
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template void nlm_fast_gpu<uchar2>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
template void nlm_fast_gpu<uchar3>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void fnlm_split_kernel(const PtrStepSz<uchar3> lab, PtrStepb l, PtrStep<uchar2> ab)
|
||||
{
|
||||
@@ -462,10 +462,10 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
uchar3 p = lab(y, x);
|
||||
ab(y,x) = make_uchar2(p.y, p.z);
|
||||
l(y,x) = p.x;
|
||||
l(y,x) = p.x;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void fnlm_split_channels(const PtrStepSz<uchar3>& lab, PtrStepb l, PtrStep<uchar2> ab, cudaStream_t stream)
|
||||
{
|
||||
dim3 b(32, 8);
|
||||
@@ -485,7 +485,7 @@ namespace cv { namespace gpu { namespace device
|
||||
if (x < lab.cols && y < lab.rows)
|
||||
{
|
||||
uchar2 p = ab(y, x);
|
||||
lab(y, x) = make_uchar3(l(y, x), p.x, p.y);
|
||||
lab(y, x) = make_uchar3(l(y, x), p.x, p.y);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,220 +1,220 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace optical_flow
|
||||
{
|
||||
#define NEEDLE_MAP_SCALE 16
|
||||
#define NUM_VERTS_PER_ARROW 6
|
||||
|
||||
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
|
||||
{
|
||||
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
|
||||
|
||||
volatile float* u_col_sum = smem;
|
||||
volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE;
|
||||
|
||||
const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
|
||||
const int y = blockIdx.y * NEEDLE_MAP_SCALE;
|
||||
|
||||
u_col_sum[threadIdx.x] = 0;
|
||||
v_col_sum[threadIdx.x] = 0;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < NEEDLE_MAP_SCALE; ++i)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
|
||||
v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
|
||||
}
|
||||
|
||||
if (threadIdx.x < 8)
|
||||
{
|
||||
// now add the column sums
|
||||
const uint X = threadIdx.x;
|
||||
|
||||
if (X | 0xfe == 0xfe) // bit 0 is 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
|
||||
}
|
||||
|
||||
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
|
||||
}
|
||||
|
||||
if (X | 0xf8 == 0xf8)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
|
||||
}
|
||||
|
||||
if (X == 0)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8];
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE);
|
||||
|
||||
u_col_sum[0] *= coeff;
|
||||
v_col_sum[0] *= coeff;
|
||||
|
||||
u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0];
|
||||
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
|
||||
}
|
||||
}
|
||||
|
||||
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
|
||||
{
|
||||
const dim3 block(NEEDLE_MAP_SCALE);
|
||||
const dim3 grid(u_avg.cols, u_avg.rows);
|
||||
|
||||
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
|
||||
{
|
||||
// test - just draw a triangle at each pixel
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
|
||||
const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
|
||||
|
||||
float3 v[NUM_VERTS_PER_ARROW];
|
||||
|
||||
if (x < u_avg.cols && y < u_avg.rows)
|
||||
{
|
||||
const float u_avg_val = u_avg(y, x);
|
||||
const float v_avg_val = v_avg(y, x);
|
||||
|
||||
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
|
||||
|
||||
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
|
||||
r = fmin(14.0f * (r / max_flow), 14.0f);
|
||||
|
||||
v[0].z = 1.0f;
|
||||
v[1].z = 0.7f;
|
||||
v[2].z = 0.7f;
|
||||
v[3].z = 0.7f;
|
||||
v[4].z = 0.7f;
|
||||
v[5].z = 1.0f;
|
||||
|
||||
v[0].x = arrow_x;
|
||||
v[0].y = arrow_y;
|
||||
v[5].x = arrow_x;
|
||||
v[5].y = arrow_y;
|
||||
|
||||
v[2].x = arrow_x + r * ::cosf(theta);
|
||||
v[2].y = arrow_y + r * ::sinf(theta);
|
||||
v[3].x = v[2].x;
|
||||
v[3].y = v[2].y;
|
||||
|
||||
r = ::fmin(r, 2.5f);
|
||||
|
||||
v[1].x = arrow_x + r * ::cosf(theta - CV_PI / 2.0f);
|
||||
v[1].y = arrow_y + r * ::sinf(theta - CV_PI / 2.0f);
|
||||
|
||||
v[4].x = arrow_x + r * ::cosf(theta + CV_PI / 2.0f);
|
||||
v[4].y = arrow_y + r * ::sinf(theta + CV_PI / 2.0f);
|
||||
|
||||
int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[0].x * xscale;
|
||||
vertex_data[indx++] = v[0].y * yscale;
|
||||
vertex_data[indx++] = v[0].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[1].x * xscale;
|
||||
vertex_data[indx++] = v[1].y * yscale;
|
||||
vertex_data[indx++] = v[1].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[2].x * xscale;
|
||||
vertex_data[indx++] = v[2].y * yscale;
|
||||
vertex_data[indx++] = v[2].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[3].x * xscale;
|
||||
vertex_data[indx++] = v[3].y * yscale;
|
||||
vertex_data[indx++] = v[3].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[4].x * xscale;
|
||||
vertex_data[indx++] = v[4].y * yscale;
|
||||
vertex_data[indx++] = v[4].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[5].x * xscale;
|
||||
vertex_data[indx++] = v[5].y * yscale;
|
||||
vertex_data[indx++] = v[5].z;
|
||||
}
|
||||
}
|
||||
|
||||
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
|
||||
{
|
||||
const dim3 block(16);
|
||||
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
|
||||
|
||||
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace optical_flow
|
||||
{
|
||||
#define NEEDLE_MAP_SCALE 16
|
||||
#define NUM_VERTS_PER_ARROW 6
|
||||
|
||||
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
|
||||
{
|
||||
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
|
||||
|
||||
volatile float* u_col_sum = smem;
|
||||
volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE;
|
||||
|
||||
const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
|
||||
const int y = blockIdx.y * NEEDLE_MAP_SCALE;
|
||||
|
||||
u_col_sum[threadIdx.x] = 0;
|
||||
v_col_sum[threadIdx.x] = 0;
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < NEEDLE_MAP_SCALE; ++i)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
|
||||
v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
|
||||
}
|
||||
|
||||
if (threadIdx.x < 8)
|
||||
{
|
||||
// now add the column sums
|
||||
const uint X = threadIdx.x;
|
||||
|
||||
if (X | 0xfe == 0xfe) // bit 0 is 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
|
||||
}
|
||||
|
||||
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
|
||||
}
|
||||
|
||||
if (X | 0xf8 == 0xf8)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
|
||||
}
|
||||
|
||||
if (X == 0)
|
||||
{
|
||||
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8];
|
||||
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8];
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE);
|
||||
|
||||
u_col_sum[0] *= coeff;
|
||||
v_col_sum[0] *= coeff;
|
||||
|
||||
u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0];
|
||||
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
|
||||
}
|
||||
}
|
||||
|
||||
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
|
||||
{
|
||||
const dim3 block(NEEDLE_MAP_SCALE);
|
||||
const dim3 grid(u_avg.cols, u_avg.rows);
|
||||
|
||||
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
|
||||
{
|
||||
// test - just draw a triangle at each pixel
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
|
||||
const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
|
||||
|
||||
float3 v[NUM_VERTS_PER_ARROW];
|
||||
|
||||
if (x < u_avg.cols && y < u_avg.rows)
|
||||
{
|
||||
const float u_avg_val = u_avg(y, x);
|
||||
const float v_avg_val = v_avg(y, x);
|
||||
|
||||
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
|
||||
|
||||
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
|
||||
r = fmin(14.0f * (r / max_flow), 14.0f);
|
||||
|
||||
v[0].z = 1.0f;
|
||||
v[1].z = 0.7f;
|
||||
v[2].z = 0.7f;
|
||||
v[3].z = 0.7f;
|
||||
v[4].z = 0.7f;
|
||||
v[5].z = 1.0f;
|
||||
|
||||
v[0].x = arrow_x;
|
||||
v[0].y = arrow_y;
|
||||
v[5].x = arrow_x;
|
||||
v[5].y = arrow_y;
|
||||
|
||||
v[2].x = arrow_x + r * ::cosf(theta);
|
||||
v[2].y = arrow_y + r * ::sinf(theta);
|
||||
v[3].x = v[2].x;
|
||||
v[3].y = v[2].y;
|
||||
|
||||
r = ::fmin(r, 2.5f);
|
||||
|
||||
v[1].x = arrow_x + r * ::cosf(theta - CV_PI / 2.0f);
|
||||
v[1].y = arrow_y + r * ::sinf(theta - CV_PI / 2.0f);
|
||||
|
||||
v[4].x = arrow_x + r * ::cosf(theta + CV_PI / 2.0f);
|
||||
v[4].y = arrow_y + r * ::sinf(theta + CV_PI / 2.0f);
|
||||
|
||||
int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[0].x * xscale;
|
||||
vertex_data[indx++] = v[0].y * yscale;
|
||||
vertex_data[indx++] = v[0].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[1].x * xscale;
|
||||
vertex_data[indx++] = v[1].y * yscale;
|
||||
vertex_data[indx++] = v[1].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[2].x * xscale;
|
||||
vertex_data[indx++] = v[2].y * yscale;
|
||||
vertex_data[indx++] = v[2].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[3].x * xscale;
|
||||
vertex_data[indx++] = v[3].y * yscale;
|
||||
vertex_data[indx++] = v[3].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[4].x * xscale;
|
||||
vertex_data[indx++] = v[4].y * yscale;
|
||||
vertex_data[indx++] = v[4].z;
|
||||
|
||||
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
|
||||
vertex_data[indx++] = v[5].x * xscale;
|
||||
vertex_data[indx++] = v[5].y * yscale;
|
||||
vertex_data[indx++] = v[5].z;
|
||||
}
|
||||
}
|
||||
|
||||
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
|
||||
{
|
||||
const dim3 block(16);
|
||||
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
|
||||
|
||||
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,422 +1,422 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace orb
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// cull
|
||||
|
||||
int cull_gpu(int* loc, float* response, int size, int n_points)
|
||||
{
|
||||
thrust::device_ptr<int> loc_ptr(loc);
|
||||
thrust::device_ptr<float> response_ptr(response);
|
||||
|
||||
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
|
||||
|
||||
return n_points;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HarrisResponses
|
||||
|
||||
__global__ void HarrisResponses(const PtrStepb img, const short2* loc_, float* response, const int npoints, const int blockSize, const float harris_k)
|
||||
{
|
||||
__shared__ int smem[8 * 32];
|
||||
|
||||
volatile int* srow = smem + threadIdx.y * blockDim.x;
|
||||
|
||||
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ptidx < npoints)
|
||||
{
|
||||
const short2 loc = loc_[ptidx];
|
||||
|
||||
const int r = blockSize / 2;
|
||||
const int x0 = loc.x - r;
|
||||
const int y0 = loc.y - r;
|
||||
|
||||
int a = 0, b = 0, c = 0;
|
||||
|
||||
for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
|
||||
{
|
||||
const int i = ind / blockSize;
|
||||
const int j = ind % blockSize;
|
||||
|
||||
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
|
||||
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));
|
||||
|
||||
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
|
||||
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));
|
||||
|
||||
a += Ix * Ix;
|
||||
b += Iy * Iy;
|
||||
c += Ix * Iy;
|
||||
}
|
||||
|
||||
reduce<32>(srow, a, threadIdx.x, plus<volatile int>());
|
||||
reduce<32>(srow, b, threadIdx.x, plus<volatile int>());
|
||||
reduce<32>(srow, c, threadIdx.x, plus<volatile int>());
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float scale = (1 << 2) * blockSize * 255.0f;
|
||||
scale = 1.0f / scale;
|
||||
const float scale_sq_sq = scale * scale * scale * scale;
|
||||
|
||||
response[ptidx] = ((float)a * b - (float)c * c - harris_k * ((float)a + b) * ((float)a + b)) * scale_sq_sq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(npoints, block.y);
|
||||
|
||||
HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// IC_Angle
|
||||
|
||||
__constant__ int c_u_max[32];
|
||||
|
||||
void loadUMax(const int* u_max, int count)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
|
||||
}
|
||||
|
||||
__global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k)
|
||||
{
|
||||
__shared__ int smem[8 * 32];
|
||||
|
||||
volatile int* srow = smem + threadIdx.y * blockDim.x;
|
||||
|
||||
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ptidx < npoints)
|
||||
{
|
||||
int m_01 = 0, m_10 = 0;
|
||||
|
||||
const short2 loc = loc_[ptidx];
|
||||
|
||||
// Treat the center line differently, v=0
|
||||
for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
|
||||
m_10 += u * image(loc.y, loc.x + u);
|
||||
|
||||
reduce<32>(srow, m_10, threadIdx.x, plus<volatile int>());
|
||||
|
||||
for (int v = 1; v <= half_k; ++v)
|
||||
{
|
||||
// Proceed over the two lines
|
||||
int v_sum = 0;
|
||||
int m_sum = 0;
|
||||
const int d = c_u_max[v];
|
||||
|
||||
for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
|
||||
{
|
||||
int val_plus = image(loc.y + v, loc.x + u);
|
||||
int val_minus = image(loc.y - v, loc.x + u);
|
||||
|
||||
v_sum += (val_plus - val_minus);
|
||||
m_sum += u * (val_plus + val_minus);
|
||||
}
|
||||
|
||||
reduce<32>(srow, v_sum, threadIdx.x, plus<volatile int>());
|
||||
reduce<32>(srow, m_sum, threadIdx.x, plus<volatile int>());
|
||||
|
||||
m_10 += m_sum;
|
||||
m_01 += v * v_sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float kp_dir = ::atan2f((float)m_01, (float)m_10);
|
||||
kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
|
||||
kp_dir *= 180.0f / CV_PI;
|
||||
|
||||
angle[ptidx] = kp_dir;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(npoints, block.y);
|
||||
|
||||
IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// computeOrbDescriptor
|
||||
|
||||
template <int WTA_K> struct OrbDescriptor;
|
||||
|
||||
#define GET_VALUE(idx) \
|
||||
img(loc.y + __float2int_rn(pattern_x[idx] * sina + pattern_y[idx] * cosa), \
|
||||
loc.x + __float2int_rn(pattern_x[idx] * cosa - pattern_y[idx] * sina))
|
||||
|
||||
template <> struct OrbDescriptor<2>
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 16 * i;
|
||||
pattern_y += 16 * i;
|
||||
|
||||
int t0, t1, val;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
|
||||
val = t0 < t1;
|
||||
|
||||
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
|
||||
val |= (t0 < t1) << 1;
|
||||
|
||||
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
|
||||
val |= (t0 < t1) << 2;
|
||||
|
||||
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
|
||||
val |= (t0 < t1) << 3;
|
||||
|
||||
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
|
||||
val |= (t0 < t1) << 4;
|
||||
|
||||
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
|
||||
val |= (t0 < t1) << 5;
|
||||
|
||||
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
|
||||
val |= (t0 < t1) << 6;
|
||||
|
||||
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
|
||||
val |= (t0 < t1) << 7;
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct OrbDescriptor<3>
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 12 * i;
|
||||
pattern_y += 12 * i;
|
||||
|
||||
int t0, t1, t2, val;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
|
||||
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
|
||||
|
||||
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
|
||||
|
||||
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
|
||||
|
||||
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct OrbDescriptor<4>
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 16 * i;
|
||||
pattern_y += 16 * i;
|
||||
|
||||
int t0, t1, t2, t3, k, val;
|
||||
int a, b;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
|
||||
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val = k;
|
||||
|
||||
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
|
||||
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 2;
|
||||
|
||||
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
|
||||
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 4;
|
||||
|
||||
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
|
||||
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 6;
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
#undef GET_VALUE
|
||||
|
||||
template <int WTA_K>
|
||||
__global__ void computeOrbDescriptor(const PtrStepb img, const short2* loc, const float* angle_, const int npoints,
|
||||
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize)
|
||||
{
|
||||
const int descidx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int ptidx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ptidx < npoints && descidx < dsize)
|
||||
{
|
||||
float angle = angle_[ptidx];
|
||||
angle *= (float)(CV_PI / 180.f);
|
||||
|
||||
float sina, cosa;
|
||||
::sincosf(angle, &sina, &cosa);
|
||||
|
||||
desc.ptr(ptidx)[descidx] = OrbDescriptor<WTA_K>::calc(img, loc[ptidx], pattern_x, pattern_y, sina, cosa, descidx);
|
||||
}
|
||||
}
|
||||
|
||||
void computeOrbDescriptor_gpu(PtrStepb img, const short2* loc, const float* angle, const int npoints,
|
||||
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize, int WTA_K, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(dsize, block.x);
|
||||
grid.y = divUp(npoints, block.y);
|
||||
|
||||
switch (WTA_K)
|
||||
{
|
||||
case 2:
|
||||
computeOrbDescriptor<2><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
computeOrbDescriptor<3><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
computeOrbDescriptor<4><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// mergeLocation
|
||||
|
||||
__global__ void mergeLocation(const short2* loc_, float* x, float* y, const int npoints, float scale)
|
||||
{
|
||||
const int ptidx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (ptidx < npoints)
|
||||
{
|
||||
short2 loc = loc_[ptidx];
|
||||
|
||||
x[ptidx] = loc.x * scale;
|
||||
y[ptidx] = loc.y * scale;
|
||||
}
|
||||
}
|
||||
|
||||
void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(256);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(npoints, block.x);
|
||||
|
||||
mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace orb
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// cull
|
||||
|
||||
int cull_gpu(int* loc, float* response, int size, int n_points)
|
||||
{
|
||||
thrust::device_ptr<int> loc_ptr(loc);
|
||||
thrust::device_ptr<float> response_ptr(response);
|
||||
|
||||
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
|
||||
|
||||
return n_points;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HarrisResponses
|
||||
|
||||
__global__ void HarrisResponses(const PtrStepb img, const short2* loc_, float* response, const int npoints, const int blockSize, const float harris_k)
|
||||
{
|
||||
__shared__ int smem[8 * 32];
|
||||
|
||||
volatile int* srow = smem + threadIdx.y * blockDim.x;
|
||||
|
||||
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ptidx < npoints)
|
||||
{
|
||||
const short2 loc = loc_[ptidx];
|
||||
|
||||
const int r = blockSize / 2;
|
||||
const int x0 = loc.x - r;
|
||||
const int y0 = loc.y - r;
|
||||
|
||||
int a = 0, b = 0, c = 0;
|
||||
|
||||
for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
|
||||
{
|
||||
const int i = ind / blockSize;
|
||||
const int j = ind % blockSize;
|
||||
|
||||
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
|
||||
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));
|
||||
|
||||
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
|
||||
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
|
||||
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));
|
||||
|
||||
a += Ix * Ix;
|
||||
b += Iy * Iy;
|
||||
c += Ix * Iy;
|
||||
}
|
||||
|
||||
reduce<32>(srow, a, threadIdx.x, plus<volatile int>());
|
||||
reduce<32>(srow, b, threadIdx.x, plus<volatile int>());
|
||||
reduce<32>(srow, c, threadIdx.x, plus<volatile int>());
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float scale = (1 << 2) * blockSize * 255.0f;
|
||||
scale = 1.0f / scale;
|
||||
const float scale_sq_sq = scale * scale * scale * scale;
|
||||
|
||||
response[ptidx] = ((float)a * b - (float)c * c - harris_k * ((float)a + b) * ((float)a + b)) * scale_sq_sq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(npoints, block.y);
|
||||
|
||||
HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// IC_Angle
|
||||
|
||||
__constant__ int c_u_max[32];
|
||||
|
||||
void loadUMax(const int* u_max, int count)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
|
||||
}
|
||||
|
||||
__global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k)
|
||||
{
|
||||
__shared__ int smem[8 * 32];
|
||||
|
||||
volatile int* srow = smem + threadIdx.y * blockDim.x;
|
||||
|
||||
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ptidx < npoints)
|
||||
{
|
||||
int m_01 = 0, m_10 = 0;
|
||||
|
||||
const short2 loc = loc_[ptidx];
|
||||
|
||||
// Treat the center line differently, v=0
|
||||
for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
|
||||
m_10 += u * image(loc.y, loc.x + u);
|
||||
|
||||
reduce<32>(srow, m_10, threadIdx.x, plus<volatile int>());
|
||||
|
||||
for (int v = 1; v <= half_k; ++v)
|
||||
{
|
||||
// Proceed over the two lines
|
||||
int v_sum = 0;
|
||||
int m_sum = 0;
|
||||
const int d = c_u_max[v];
|
||||
|
||||
for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
|
||||
{
|
||||
int val_plus = image(loc.y + v, loc.x + u);
|
||||
int val_minus = image(loc.y - v, loc.x + u);
|
||||
|
||||
v_sum += (val_plus - val_minus);
|
||||
m_sum += u * (val_plus + val_minus);
|
||||
}
|
||||
|
||||
reduce<32>(srow, v_sum, threadIdx.x, plus<volatile int>());
|
||||
reduce<32>(srow, m_sum, threadIdx.x, plus<volatile int>());
|
||||
|
||||
m_10 += m_sum;
|
||||
m_01 += v * v_sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float kp_dir = ::atan2f((float)m_01, (float)m_10);
|
||||
kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
|
||||
kp_dir *= 180.0f / CV_PI;
|
||||
|
||||
angle[ptidx] = kp_dir;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(npoints, block.y);
|
||||
|
||||
IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// computeOrbDescriptor
|
||||
|
||||
template <int WTA_K> struct OrbDescriptor;
|
||||
|
||||
#define GET_VALUE(idx) \
|
||||
img(loc.y + __float2int_rn(pattern_x[idx] * sina + pattern_y[idx] * cosa), \
|
||||
loc.x + __float2int_rn(pattern_x[idx] * cosa - pattern_y[idx] * sina))
|
||||
|
||||
template <> struct OrbDescriptor<2>
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 16 * i;
|
||||
pattern_y += 16 * i;
|
||||
|
||||
int t0, t1, val;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
|
||||
val = t0 < t1;
|
||||
|
||||
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
|
||||
val |= (t0 < t1) << 1;
|
||||
|
||||
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
|
||||
val |= (t0 < t1) << 2;
|
||||
|
||||
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
|
||||
val |= (t0 < t1) << 3;
|
||||
|
||||
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
|
||||
val |= (t0 < t1) << 4;
|
||||
|
||||
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
|
||||
val |= (t0 < t1) << 5;
|
||||
|
||||
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
|
||||
val |= (t0 < t1) << 6;
|
||||
|
||||
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
|
||||
val |= (t0 < t1) << 7;
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct OrbDescriptor<3>
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 12 * i;
|
||||
pattern_y += 12 * i;
|
||||
|
||||
int t0, t1, t2, val;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
|
||||
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
|
||||
|
||||
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
|
||||
|
||||
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
|
||||
|
||||
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
|
||||
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct OrbDescriptor<4>
|
||||
{
|
||||
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
|
||||
{
|
||||
pattern_x += 16 * i;
|
||||
pattern_y += 16 * i;
|
||||
|
||||
int t0, t1, t2, t3, k, val;
|
||||
int a, b;
|
||||
|
||||
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
|
||||
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val = k;
|
||||
|
||||
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
|
||||
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 2;
|
||||
|
||||
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
|
||||
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 4;
|
||||
|
||||
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
|
||||
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
|
||||
a = 0, b = 2;
|
||||
if( t1 > t0 ) t0 = t1, a = 1;
|
||||
if( t3 > t2 ) t2 = t3, b = 3;
|
||||
k = t0 > t2 ? a : b;
|
||||
val |= k << 6;
|
||||
|
||||
return val;
|
||||
}
|
||||
};
|
||||
|
||||
#undef GET_VALUE
|
||||
|
||||
template <int WTA_K>
|
||||
__global__ void computeOrbDescriptor(const PtrStepb img, const short2* loc, const float* angle_, const int npoints,
|
||||
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize)
|
||||
{
|
||||
const int descidx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int ptidx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ptidx < npoints && descidx < dsize)
|
||||
{
|
||||
float angle = angle_[ptidx];
|
||||
angle *= (float)(CV_PI / 180.f);
|
||||
|
||||
float sina, cosa;
|
||||
::sincosf(angle, &sina, &cosa);
|
||||
|
||||
desc.ptr(ptidx)[descidx] = OrbDescriptor<WTA_K>::calc(img, loc[ptidx], pattern_x, pattern_y, sina, cosa, descidx);
|
||||
}
|
||||
}
|
||||
|
||||
void computeOrbDescriptor_gpu(PtrStepb img, const short2* loc, const float* angle, const int npoints,
|
||||
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize, int WTA_K, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(dsize, block.x);
|
||||
grid.y = divUp(npoints, block.y);
|
||||
|
||||
switch (WTA_K)
|
||||
{
|
||||
case 2:
|
||||
computeOrbDescriptor<2><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
computeOrbDescriptor<3><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
computeOrbDescriptor<4><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// mergeLocation
|
||||
|
||||
__global__ void mergeLocation(const short2* loc_, float* x, float* y, const int npoints, float scale)
|
||||
{
|
||||
const int ptidx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (ptidx < npoints)
|
||||
{
|
||||
short2 loc = loc_[ptidx];
|
||||
|
||||
x[ptidx] = loc.x * scale;
|
||||
y[ptidx] = loc.y * scale;
|
||||
}
|
||||
}
|
||||
|
||||
void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(256);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(npoints, block.x);
|
||||
|
||||
mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,228 +1,228 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
|
||||
|
||||
__shared__ work_t smem[256 + 4];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y;
|
||||
|
||||
const int src_y = 2 * y;
|
||||
|
||||
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, x);
|
||||
sum = sum + 0.25f * src(src_y - 1, x);
|
||||
sum = sum + 0.375f * src(src_y , x);
|
||||
sum = sum + 0.25f * src(src_y + 1, x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, x);
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, left_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, left_x);
|
||||
sum = sum + 0.375f * src(src_y , left_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, left_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, left_x);
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, right_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, right_x);
|
||||
sum = sum + 0.375f * src(src_y , right_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, right_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, right_x);
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 128)
|
||||
{
|
||||
const int tid2 = threadIdx.x * 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
|
||||
|
||||
if (dst_x < dst_cols)
|
||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
|
||||
|
||||
__shared__ work_t smem[256 + 4];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y;
|
||||
|
||||
const int src_y = 2 * y;
|
||||
|
||||
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, x);
|
||||
sum = sum + 0.25f * src(src_y - 1, x);
|
||||
sum = sum + 0.375f * src(src_y , x);
|
||||
sum = sum + 0.25f * src(src_y + 1, x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, x);
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, left_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, left_x);
|
||||
sum = sum + 0.375f * src(src_y , left_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, left_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, left_x);
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(src_y - 2, right_x);
|
||||
sum = sum + 0.25f * src(src_y - 1, right_x);
|
||||
sum = sum + 0.375f * src(src_y , right_x);
|
||||
sum = sum + 0.25f * src(src_y + 1, right_x);
|
||||
sum = sum + 0.0625f * src(src_y + 2, right_x);
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
|
||||
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
|
||||
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 128)
|
||||
{
|
||||
const int tid2 = threadIdx.x * 2;
|
||||
|
||||
work_t sum;
|
||||
|
||||
sum = 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
|
||||
|
||||
if (dst_x < dst_cols)
|
||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,196 +1,196 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
__shared__ sum_t s_srcPatch[10][10];
|
||||
__shared__ sum_t s_dstPatch[20][16];
|
||||
|
||||
if (threadIdx.x < 10 && threadIdx.y < 10)
|
||||
{
|
||||
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
|
||||
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
|
||||
|
||||
srcx = ::abs(srcx);
|
||||
srcx = ::min(src.cols - 1, srcx);
|
||||
|
||||
srcy = ::abs(srcy);
|
||||
srcy = ::min(src.rows - 1, srcy);
|
||||
|
||||
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
|
||||
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
|
||||
const bool eveny = ((threadIdx.y & 1) == 0);
|
||||
const int tidx = threadIdx.x;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.y < 2)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.y > 13)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int tidy = threadIdx.y;
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
pyrUp<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
__shared__ sum_t s_srcPatch[10][10];
|
||||
__shared__ sum_t s_dstPatch[20][16];
|
||||
|
||||
if (threadIdx.x < 10 && threadIdx.y < 10)
|
||||
{
|
||||
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
|
||||
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
|
||||
|
||||
srcx = ::abs(srcx);
|
||||
srcx = ::min(src.cols - 1, srcx);
|
||||
|
||||
srcy = ::abs(srcy);
|
||||
srcy = ::min(src.rows - 1, srcy);
|
||||
|
||||
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
|
||||
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
|
||||
const bool eveny = ((threadIdx.y & 1) == 0);
|
||||
const int tidx = threadIdx.x;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.y < 2)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.y > 13)
|
||||
{
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
const int tidy = threadIdx.y;
|
||||
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
|
||||
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
|
||||
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
|
||||
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
pyrUp<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,274 +1,274 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = mapx.ptr(y)[x];
|
||||
const float ycoo = mapy.ptr(y)[x];
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, int)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_remap_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float* borderValue, int cc) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float*, int) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
|
||||
else
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = mapx.ptr(y)[x];
|
||||
const float ycoo = mapy.ptr(y)[x];
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, int)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_remap_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float* borderValue, int cc) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||
PtrStepSz< type > dst, const float*, int) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
|
||||
else
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,302 +1,302 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
#include <cfloat>
|
||||
#include <opencv2/gpu/device/scan.hpp>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = x * fx;
|
||||
const float ycoo = y * fy;
|
||||
|
||||
dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
dst(y, x) = saturate_cast<T>(src(y, x));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_resize_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
const int xoff; \
|
||||
const int yoff; \
|
||||
__host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_resize_ ## type, srcWhole); \
|
||||
tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate< type > brd(src.rows, src.cols); \
|
||||
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
||||
else
|
||||
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
int iscale_x = (int)round(fx);
|
||||
int iscale_y = (int)round(fy);
|
||||
|
||||
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
|
||||
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
else
|
||||
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
PtrStepSzb dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[4] =
|
||||
{
|
||||
ResizeDispatcher<PointFilter, T>::call,
|
||||
ResizeDispatcher<LinearFilter, T>::call,
|
||||
ResizeDispatcher<CubicFilter, T>::call,
|
||||
ResizeDispatcher<AreaFilter, T>::call
|
||||
};
|
||||
// chenge to linear if area interpolation upscaling
|
||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||
interpolation = 1;
|
||||
|
||||
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template<typename T> struct scan_traits{};
|
||||
|
||||
template<> struct scan_traits<uchar>
|
||||
{
|
||||
typedef float scan_line_type;
|
||||
};
|
||||
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
#include <cfloat>
|
||||
#include <opencv2/gpu/device/scan.hpp>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float xcoo = x * fx;
|
||||
const float ycoo = y * fy;
|
||||
|
||||
dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
dst(y, x) = saturate_cast<T>(src(y, x));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
|
||||
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_resize_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
const int xoff; \
|
||||
const int yoff; \
|
||||
__host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_resize_ ## type, srcWhole); \
|
||||
tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate< type > brd(src.rows, src.cols); \
|
||||
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
|
||||
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
||||
else
|
||||
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
(void)srcWhole;
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
int iscale_x = (int)round(fx);
|
||||
int iscale_y = (int)round(fy);
|
||||
|
||||
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
|
||||
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
else
|
||||
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
PtrStepSzb dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[4] =
|
||||
{
|
||||
ResizeDispatcher<PointFilter, T>::call,
|
||||
ResizeDispatcher<LinearFilter, T>::call,
|
||||
ResizeDispatcher<CubicFilter, T>::call,
|
||||
ResizeDispatcher<AreaFilter, T>::call
|
||||
};
|
||||
// chenge to linear if area interpolation upscaling
|
||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||
interpolation = 1;
|
||||
|
||||
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
static_cast< PtrStepSz<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template<typename T> struct scan_traits{};
|
||||
|
||||
template<> struct scan_traits<uchar>
|
||||
{
|
||||
typedef float scan_line_type;
|
||||
};
|
||||
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,175 +1,175 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace video_encoding
|
||||
{
|
||||
__device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
|
||||
{
|
||||
y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
|
||||
{
|
||||
rgbtoy(b, g, r, y);
|
||||
u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
|
||||
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
|
||||
}
|
||||
|
||||
__global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||
|
||||
if (x + 1 >= src.cols || y + 1 >= src.rows)
|
||||
return;
|
||||
|
||||
// get pointers to the data
|
||||
const size_t planeSize = src.rows * dst.step;
|
||||
PtrStepb y_plane(dst.data, dst.step);
|
||||
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
|
||||
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
|
||||
|
||||
uchar pix;
|
||||
uchar y_val, u_val, v_val;
|
||||
|
||||
pix = src(y, x);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y, x) = y_val;
|
||||
|
||||
pix = src(y, x + 1);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y, x + 1) = y_val;
|
||||
|
||||
pix = src(y + 1, x);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y + 1, x) = y_val;
|
||||
|
||||
pix = src(y + 1, x + 1);
|
||||
rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
|
||||
y_plane(y + 1, x + 1) = y_val;
|
||||
u_plane(y / 2, x / 2) = u_val;
|
||||
v_plane(y / 2, x / 2) = v_val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||
|
||||
if (x + 1 >= src.cols || y + 1 >= src.rows)
|
||||
return;
|
||||
|
||||
// get pointers to the data
|
||||
const size_t planeSize = src.rows * dst.step;
|
||||
PtrStepb y_plane(dst.data, dst.step);
|
||||
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
|
||||
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
|
||||
|
||||
T pix;
|
||||
uchar y_val, u_val, v_val;
|
||||
|
||||
pix = src(y, x);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y, x) = y_val;
|
||||
|
||||
pix = src(y, x + 1);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y, x + 1) = y_val;
|
||||
|
||||
pix = src(y + 1, x);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y + 1, x) = y_val;
|
||||
|
||||
pix = src(y + 1, x + 1);
|
||||
rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
|
||||
y_plane(y + 1, x + 1) = y_val;
|
||||
u_plane(y / 2, x / 2) = u_val;
|
||||
v_plane(y / 2, x / 2) = v_val;
|
||||
}
|
||||
|
||||
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
Gray_to_YV12<<<grid, block>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <int cn>
|
||||
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type src_t;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
|
||||
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
|
||||
};
|
||||
|
||||
funcs[cn](src, dst);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/gpu/device/common.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace video_encoding
|
||||
{
|
||||
__device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
|
||||
{
|
||||
y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
|
||||
{
|
||||
rgbtoy(b, g, r, y);
|
||||
u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
|
||||
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
|
||||
}
|
||||
|
||||
__global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||
|
||||
if (x + 1 >= src.cols || y + 1 >= src.rows)
|
||||
return;
|
||||
|
||||
// get pointers to the data
|
||||
const size_t planeSize = src.rows * dst.step;
|
||||
PtrStepb y_plane(dst.data, dst.step);
|
||||
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
|
||||
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
|
||||
|
||||
uchar pix;
|
||||
uchar y_val, u_val, v_val;
|
||||
|
||||
pix = src(y, x);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y, x) = y_val;
|
||||
|
||||
pix = src(y, x + 1);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y, x + 1) = y_val;
|
||||
|
||||
pix = src(y + 1, x);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y + 1, x) = y_val;
|
||||
|
||||
pix = src(y + 1, x + 1);
|
||||
rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
|
||||
y_plane(y + 1, x + 1) = y_val;
|
||||
u_plane(y / 2, x / 2) = u_val;
|
||||
v_plane(y / 2, x / 2) = v_val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||
|
||||
if (x + 1 >= src.cols || y + 1 >= src.rows)
|
||||
return;
|
||||
|
||||
// get pointers to the data
|
||||
const size_t planeSize = src.rows * dst.step;
|
||||
PtrStepb y_plane(dst.data, dst.step);
|
||||
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
|
||||
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
|
||||
|
||||
T pix;
|
||||
uchar y_val, u_val, v_val;
|
||||
|
||||
pix = src(y, x);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y, x) = y_val;
|
||||
|
||||
pix = src(y, x + 1);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y, x + 1) = y_val;
|
||||
|
||||
pix = src(y + 1, x);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y + 1, x) = y_val;
|
||||
|
||||
pix = src(y + 1, x + 1);
|
||||
rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
|
||||
y_plane(y + 1, x + 1) = y_val;
|
||||
u_plane(y / 2, x / 2) = u_val;
|
||||
v_plane(y / 2, x / 2) = v_val;
|
||||
}
|
||||
|
||||
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
Gray_to_YV12<<<grid, block>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <int cn>
|
||||
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type src_t;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
|
||||
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
|
||||
};
|
||||
|
||||
funcs[cn](src, dst);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,387 +1,387 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/static_check.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace row_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
const int BLOCK_DIM_X = 32;
|
||||
const int BLOCK_DIM_Y = 8;
|
||||
const int PATCH_PER_BLOCK = 4;
|
||||
const int HALO_SIZE = 1;
|
||||
#else
|
||||
const int BLOCK_DIM_X = 32;
|
||||
const int BLOCK_DIM_Y = 4;
|
||||
const int PATCH_PER_BLOCK = 4;
|
||||
const int HALO_SIZE = 1;
|
||||
#endif
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
|
||||
|
||||
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
|
||||
|
||||
if (y >= src.rows)
|
||||
return;
|
||||
|
||||
const T* src_row = src.ptr(y);
|
||||
|
||||
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
|
||||
|
||||
if (blockIdx.x > 0)
|
||||
{
|
||||
//Load left halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Load left halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
|
||||
}
|
||||
|
||||
if (blockIdx.x + 2 < gridDim.x)
|
||||
{
|
||||
//Load main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
|
||||
|
||||
//Load right halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Load main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
|
||||
|
||||
//Load right halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
{
|
||||
const int x = xStart + j * BLOCK_DIM_X;
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < KSIZE; ++k)
|
||||
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
|
||||
|
||||
dst(y, x) = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||
void linearRowFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||
{
|
||||
int BLOCK_DIM_X;
|
||||
int BLOCK_DIM_Y;
|
||||
int PATCH_PER_BLOCK;
|
||||
|
||||
if (cc >= 20)
|
||||
{
|
||||
BLOCK_DIM_X = 32;
|
||||
BLOCK_DIM_Y = 8;
|
||||
PATCH_PER_BLOCK = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
BLOCK_DIM_X = 32;
|
||||
BLOCK_DIM_Y = 4;
|
||||
PATCH_PER_BLOCK = 4;
|
||||
}
|
||||
|
||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
|
||||
|
||||
B<T> brd(src.cols);
|
||||
|
||||
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowReflect>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowWrap>
|
||||
}
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize, stream);
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
||||
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
} // namespace row_filter
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/static_check.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace row_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
const int BLOCK_DIM_X = 32;
|
||||
const int BLOCK_DIM_Y = 8;
|
||||
const int PATCH_PER_BLOCK = 4;
|
||||
const int HALO_SIZE = 1;
|
||||
#else
|
||||
const int BLOCK_DIM_X = 32;
|
||||
const int BLOCK_DIM_Y = 4;
|
||||
const int PATCH_PER_BLOCK = 4;
|
||||
const int HALO_SIZE = 1;
|
||||
#endif
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
|
||||
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
|
||||
|
||||
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
|
||||
|
||||
if (y >= src.rows)
|
||||
return;
|
||||
|
||||
const T* src_row = src.ptr(y);
|
||||
|
||||
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
|
||||
|
||||
if (blockIdx.x > 0)
|
||||
{
|
||||
//Load left halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Load left halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
|
||||
}
|
||||
|
||||
if (blockIdx.x + 2 < gridDim.x)
|
||||
{
|
||||
//Load main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
|
||||
|
||||
//Load right halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Load main data
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
|
||||
|
||||
//Load right halo
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HALO_SIZE; ++j)
|
||||
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
|
||||
{
|
||||
const int x = xStart + j * BLOCK_DIM_X;
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < KSIZE; ++k)
|
||||
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
|
||||
|
||||
dst(y, x) = saturate_cast<D>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||
void linearRowFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||
{
|
||||
int BLOCK_DIM_X;
|
||||
int BLOCK_DIM_Y;
|
||||
int PATCH_PER_BLOCK;
|
||||
|
||||
if (cc >= 20)
|
||||
{
|
||||
BLOCK_DIM_X = 32;
|
||||
BLOCK_DIM_Y = 8;
|
||||
PATCH_PER_BLOCK = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
BLOCK_DIM_X = 32;
|
||||
BLOCK_DIM_Y = 4;
|
||||
PATCH_PER_BLOCK = 4;
|
||||
}
|
||||
|
||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
|
||||
|
||||
B<T> brd(src.cols);
|
||||
|
||||
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowConstant>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowReflect>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowReflect>
|
||||
},
|
||||
{
|
||||
0,
|
||||
linearRowFilter_caller< 1, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 2, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 3, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 4, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 5, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 6, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 7, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 8, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller< 9, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<10, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<11, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<12, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<13, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<14, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<15, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<16, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<17, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<18, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<19, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<20, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<21, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<22, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<23, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<24, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<25, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<26, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<27, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<28, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<29, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<30, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<31, T, D, BrdRowWrap>,
|
||||
linearRowFilter_caller<32, T, D, BrdRowWrap>
|
||||
}
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize, stream);
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
||||
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||
} // namespace row_filter
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
|
||||
@@ -1,95 +1,95 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
|
||||
#define __OPENCV_CUDA_SAFE_CALL_HPP__
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include <cublas.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
|
||||
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
|
||||
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
|
||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void nppError(int err, const char *file, const int line, const char *func = "");
|
||||
void ncvError(int err, const char *file, const int line, const char *func = "");
|
||||
void cufftError(int err, const char *file, const int line, const char *func = "");
|
||||
void cublasError(int err, const char *file, const int line, const char *func = "");
|
||||
}}
|
||||
|
||||
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (err < 0)
|
||||
cv::gpu::nppError(err, file, line, func);
|
||||
}
|
||||
|
||||
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (NCV_SUCCESS != err)
|
||||
cv::gpu::ncvError(err, file, line, func);
|
||||
}
|
||||
|
||||
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (CUFFT_SUCCESS != err)
|
||||
cv::gpu::cufftError(err, file, line, func);
|
||||
}
|
||||
|
||||
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (CUBLAS_STATUS_SUCCESS != err)
|
||||
cv::gpu::cublasError(err, file, line, func);
|
||||
}
|
||||
|
||||
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
|
||||
#define __OPENCV_CUDA_SAFE_CALL_HPP__
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cufft.h>
|
||||
#include <cublas.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
|
||||
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
|
||||
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
|
||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void nppError(int err, const char *file, const int line, const char *func = "");
|
||||
void ncvError(int err, const char *file, const int line, const char *func = "");
|
||||
void cufftError(int err, const char *file, const int line, const char *func = "");
|
||||
void cublasError(int err, const char *file, const int line, const char *func = "");
|
||||
}}
|
||||
|
||||
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (err < 0)
|
||||
cv::gpu::nppError(err, file, line, func);
|
||||
}
|
||||
|
||||
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (NCV_SUCCESS != err)
|
||||
cv::gpu::ncvError(err, file, line, func);
|
||||
}
|
||||
|
||||
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (CUFFT_SUCCESS != err)
|
||||
cv::gpu::cufftError(err, file, line, func);
|
||||
}
|
||||
|
||||
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
|
||||
{
|
||||
if (CUBLAS_STATUS_SUCCESS != err)
|
||||
cv::gpu::cublasError(err, file, line, func);
|
||||
}
|
||||
|
||||
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -56,14 +56,14 @@ namespace cv
|
||||
template<class T, enum cudaTextureReadMode readMode>
|
||||
TextureBinder(const PtrStepSz<T>& arr, const struct texture<T, 2, readMode>& tex) : texref(&tex)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step) );
|
||||
}
|
||||
|
||||
|
||||
template<class T, enum cudaTextureReadMode readMode>
|
||||
TextureBinder(const PtrSz<T>& arr, const struct texture<T, 1, readMode> &tex) : texref(&tex)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture(0, tex, arr.data, desc, arr.size * arr.elemSize()) );
|
||||
}
|
||||
|
||||
|
||||
@@ -1,389 +1,389 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__constant__ float c_warpMat[3 * 3];
|
||||
|
||||
struct AffineTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
struct PerspectiveTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Build Maps
|
||||
|
||||
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < xmap.cols && y < xmap.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
xmap(y, x) = coord.x;
|
||||
ymap(y, x) = coord.y;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Warp
|
||||
|
||||
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, int)
|
||||
{
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
(void)srcWhole;
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
|
||||
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_warp_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, int cc) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, int) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc);
|
||||
else
|
||||
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, typename T>
|
||||
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const func_t funcs[3][5] =
|
||||
{
|
||||
{
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace imgproc
|
||||
{
|
||||
__constant__ float c_warpMat[3 * 3];
|
||||
|
||||
struct AffineTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
|
||||
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
struct PerspectiveTransform
|
||||
{
|
||||
static __device__ __forceinline__ float2 calcCoord(int x, int y)
|
||||
{
|
||||
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
|
||||
|
||||
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
|
||||
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
|
||||
|
||||
return make_float2(xcoo, ycoo);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Build Maps
|
||||
|
||||
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < xmap.cols && y < xmap.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
xmap(y, x) = coord.x;
|
||||
ymap(y, x) = coord.y;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Warp
|
||||
|
||||
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
const float2 coord = Transform::calcCoord(x, y);
|
||||
|
||||
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
|
||||
}
|
||||
}
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, int)
|
||||
{
|
||||
(void)xoff;
|
||||
(void)yoff;
|
||||
(void)srcWhole;
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
|
||||
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||
struct tex_warp_ ## type ## _reader \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, int cc) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, int) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_warp_ ## type , srcWhole); \
|
||||
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
|
||||
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
|
||||
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
|
||||
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
|
||||
|
||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||
{
|
||||
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc);
|
||||
else
|
||||
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc);
|
||||
}
|
||||
};
|
||||
|
||||
template <class Transform, typename T>
|
||||
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const func_t funcs[3][5] =
|
||||
{
|
||||
{
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
|
||||
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
||||
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
|
||||
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
||||
@@ -1,253 +1,253 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
struct Stream::Impl
|
||||
{
|
||||
static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; }
|
||||
cudaStream_t stream;
|
||||
int ref_counter;
|
||||
};
|
||||
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
|
||||
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
|
||||
{
|
||||
return Stream::Impl::getStream(stream.impl);
|
||||
};
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::Stream::create() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::release() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }
|
||||
cv::gpu::Stream::~Stream() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }
|
||||
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
|
||||
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, CudaMem& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
|
||||
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
||||
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);
|
||||
void setTo(GpuMat& src, Scalar s, cudaStream_t stream);
|
||||
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
|
||||
}}
|
||||
|
||||
namespace
|
||||
{
|
||||
template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)
|
||||
{
|
||||
dst.create(src.size(), src.type());
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) );
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::create()
|
||||
{
|
||||
if (impl)
|
||||
release();
|
||||
|
||||
cudaStream_t stream;
|
||||
cudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
|
||||
impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));
|
||||
|
||||
impl->stream = stream;
|
||||
impl->ref_counter = 1;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::release()
|
||||
{
|
||||
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )
|
||||
{
|
||||
cudaSafeCall( cudaStreamDestroy( impl->stream ) );
|
||||
cv::fastFree( impl );
|
||||
}
|
||||
}
|
||||
|
||||
cv::gpu::Stream::Stream() : impl(0) { create(); }
|
||||
cv::gpu::Stream::~Stream() { release(); }
|
||||
|
||||
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
|
||||
{
|
||||
if( impl )
|
||||
CV_XADD(&impl->ref_counter, 1);
|
||||
}
|
||||
Stream& cv::gpu::Stream::operator=(const Stream& stream)
|
||||
{
|
||||
if( this != &stream )
|
||||
{
|
||||
if( stream.impl )
|
||||
CV_XADD(&stream.impl->ref_counter, 1);
|
||||
|
||||
release();
|
||||
impl = stream.impl;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool cv::gpu::Stream::queryIfComplete()
|
||||
{
|
||||
cudaError_t err = cudaStreamQuery( Impl::getStream(impl) );
|
||||
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess)
|
||||
return err == cudaSuccess;
|
||||
|
||||
cudaSafeCall(err);
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); }
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
{
|
||||
// if not -> allocation will be done, but after that dst will not point to page locked memory
|
||||
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
|
||||
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); }
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); }
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||
{
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
|
||||
return;
|
||||
}
|
||||
if (src.depth() == CV_8U)
|
||||
{
|
||||
int cn = src.channels();
|
||||
|
||||
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||
{
|
||||
int val = saturate_cast<uchar>(s[0]);
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
setTo(src, s, Impl::getStream(impl));
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
CV_Assert(mask.type() == CV_8UC1);
|
||||
|
||||
setTo(src, val, mask, Impl::getStream(impl));
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
|
||||
|
||||
if( rtype < 0 )
|
||||
rtype = src.type();
|
||||
else
|
||||
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
|
||||
|
||||
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
|
||||
if( sdepth == ddepth && noScale )
|
||||
{
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
|
||||
GpuMat temp;
|
||||
const GpuMat* psrc = &src;
|
||||
if( sdepth != ddepth && psrc == &dst )
|
||||
psrc = &(temp = src);
|
||||
|
||||
dst.create( src.size(), rtype );
|
||||
convertTo(src, dst, alpha, beta, Impl::getStream(impl));
|
||||
}
|
||||
|
||||
cv::gpu::Stream::operator bool() const
|
||||
{
|
||||
return impl && impl->stream;
|
||||
}
|
||||
|
||||
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {}
|
||||
|
||||
cv::gpu::Stream& cv::gpu::Stream::Null()
|
||||
{
|
||||
static Stream s((Impl*)0);
|
||||
return s;
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
struct Stream::Impl
|
||||
{
|
||||
static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; }
|
||||
cudaStream_t stream;
|
||||
int ref_counter;
|
||||
};
|
||||
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
|
||||
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
|
||||
{
|
||||
return Stream::Impl::getStream(stream.impl);
|
||||
};
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::Stream::create() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::release() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }
|
||||
cv::gpu::Stream::~Stream() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }
|
||||
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
|
||||
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, CudaMem& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
|
||||
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
|
||||
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);
|
||||
void setTo(GpuMat& src, Scalar s, cudaStream_t stream);
|
||||
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
|
||||
}}
|
||||
|
||||
namespace
|
||||
{
|
||||
template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)
|
||||
{
|
||||
dst.create(src.size(), src.type());
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) );
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::create()
|
||||
{
|
||||
if (impl)
|
||||
release();
|
||||
|
||||
cudaStream_t stream;
|
||||
cudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
|
||||
impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));
|
||||
|
||||
impl->stream = stream;
|
||||
impl->ref_counter = 1;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::release()
|
||||
{
|
||||
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )
|
||||
{
|
||||
cudaSafeCall( cudaStreamDestroy( impl->stream ) );
|
||||
cv::fastFree( impl );
|
||||
}
|
||||
}
|
||||
|
||||
cv::gpu::Stream::Stream() : impl(0) { create(); }
|
||||
cv::gpu::Stream::~Stream() { release(); }
|
||||
|
||||
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
|
||||
{
|
||||
if( impl )
|
||||
CV_XADD(&impl->ref_counter, 1);
|
||||
}
|
||||
Stream& cv::gpu::Stream::operator=(const Stream& stream)
|
||||
{
|
||||
if( this != &stream )
|
||||
{
|
||||
if( stream.impl )
|
||||
CV_XADD(&stream.impl->ref_counter, 1);
|
||||
|
||||
release();
|
||||
impl = stream.impl;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool cv::gpu::Stream::queryIfComplete()
|
||||
{
|
||||
cudaError_t err = cudaStreamQuery( Impl::getStream(impl) );
|
||||
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess)
|
||||
return err == cudaSuccess;
|
||||
|
||||
cudaSafeCall(err);
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); }
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
{
|
||||
// if not -> allocation will be done, but after that dst will not point to page locked memory
|
||||
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
|
||||
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); }
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); }
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||
{
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
|
||||
return;
|
||||
}
|
||||
if (src.depth() == CV_8U)
|
||||
{
|
||||
int cn = src.channels();
|
||||
|
||||
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||
{
|
||||
int val = saturate_cast<uchar>(s[0]);
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
setTo(src, s, Impl::getStream(impl));
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
CV_Assert(mask.type() == CV_8UC1);
|
||||
|
||||
setTo(src, val, mask, Impl::getStream(impl));
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
|
||||
|
||||
if( rtype < 0 )
|
||||
rtype = src.type();
|
||||
else
|
||||
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
|
||||
|
||||
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
|
||||
if( sdepth == ddepth && noScale )
|
||||
{
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
|
||||
GpuMat temp;
|
||||
const GpuMat* psrc = &src;
|
||||
if( sdepth != ddepth && psrc == &dst )
|
||||
psrc = &(temp = src);
|
||||
|
||||
dst.create( src.size(), rtype );
|
||||
convertTo(src, dst, alpha, beta, Impl::getStream(impl));
|
||||
}
|
||||
|
||||
cv::gpu::Stream::operator bool() const
|
||||
{
|
||||
return impl && impl->stream;
|
||||
}
|
||||
|
||||
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {}
|
||||
|
||||
cv::gpu::Stream& cv::gpu::Stream::Null()
|
||||
{
|
||||
static Stream s((Impl*)0);
|
||||
return s;
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
@@ -1,63 +1,63 @@
|
||||
#include "cuvid_video_source.h"
|
||||
#include "cu_safe_call.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const std::string& fname)
|
||||
{
|
||||
CUVIDSOURCEPARAMS params;
|
||||
std::memset(¶ms, 0, sizeof(CUVIDSOURCEPARAMS));
|
||||
|
||||
// Fill parameter struct
|
||||
params.pUserData = this; // will be passed to data handlers
|
||||
params.pfnVideoDataHandler = HandleVideoData; // our local video-handler callback
|
||||
params.pfnAudioDataHandler = 0;
|
||||
|
||||
// now create the actual source
|
||||
CUresult res = cuvidCreateVideoSource(&videoSource_, fname.c_str(), ¶ms);
|
||||
if (res == CUDA_ERROR_INVALID_SOURCE)
|
||||
throw std::runtime_error("Unsupported video source");
|
||||
cuSafeCall( res );
|
||||
|
||||
CUVIDEOFORMAT vidfmt;
|
||||
cuSafeCall( cuvidGetSourceVideoFormat(videoSource_, &vidfmt, 0) );
|
||||
|
||||
format_.codec = static_cast<VideoReader_GPU::Codec>(vidfmt.codec);
|
||||
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(vidfmt.chroma_format);
|
||||
format_.width = vidfmt.coded_width;
|
||||
format_.height = vidfmt.coded_height;
|
||||
}
|
||||
|
||||
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const
|
||||
{
|
||||
return format_;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::CuvidVideoSource::start()
|
||||
{
|
||||
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Started) );
|
||||
}
|
||||
|
||||
void cv::gpu::detail::CuvidVideoSource::stop()
|
||||
{
|
||||
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Stopped) );
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::CuvidVideoSource::isStarted() const
|
||||
{
|
||||
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Started);
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::CuvidVideoSource::hasError() const
|
||||
{
|
||||
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Error);
|
||||
}
|
||||
|
||||
int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, CUVIDSOURCEDATAPACKET* packet)
|
||||
{
|
||||
CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);
|
||||
|
||||
return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
|
||||
}
|
||||
|
||||
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
#include "cuvid_video_source.h"
|
||||
#include "cu_safe_call.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const std::string& fname)
|
||||
{
|
||||
CUVIDSOURCEPARAMS params;
|
||||
std::memset(¶ms, 0, sizeof(CUVIDSOURCEPARAMS));
|
||||
|
||||
// Fill parameter struct
|
||||
params.pUserData = this; // will be passed to data handlers
|
||||
params.pfnVideoDataHandler = HandleVideoData; // our local video-handler callback
|
||||
params.pfnAudioDataHandler = 0;
|
||||
|
||||
// now create the actual source
|
||||
CUresult res = cuvidCreateVideoSource(&videoSource_, fname.c_str(), ¶ms);
|
||||
if (res == CUDA_ERROR_INVALID_SOURCE)
|
||||
throw std::runtime_error("Unsupported video source");
|
||||
cuSafeCall( res );
|
||||
|
||||
CUVIDEOFORMAT vidfmt;
|
||||
cuSafeCall( cuvidGetSourceVideoFormat(videoSource_, &vidfmt, 0) );
|
||||
|
||||
format_.codec = static_cast<VideoReader_GPU::Codec>(vidfmt.codec);
|
||||
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(vidfmt.chroma_format);
|
||||
format_.width = vidfmt.coded_width;
|
||||
format_.height = vidfmt.coded_height;
|
||||
}
|
||||
|
||||
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const
|
||||
{
|
||||
return format_;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::CuvidVideoSource::start()
|
||||
{
|
||||
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Started) );
|
||||
}
|
||||
|
||||
void cv::gpu::detail::CuvidVideoSource::stop()
|
||||
{
|
||||
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Stopped) );
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::CuvidVideoSource::isStarted() const
|
||||
{
|
||||
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Started);
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::CuvidVideoSource::hasError() const
|
||||
{
|
||||
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Error);
|
||||
}
|
||||
|
||||
int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, CUVIDSOURCEDATAPACKET* packet)
|
||||
{
|
||||
CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);
|
||||
|
||||
return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
|
||||
}
|
||||
|
||||
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
@@ -1,90 +1,90 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __CUVUD_VIDEO_SOURCE_H__
|
||||
#define __CUVUD_VIDEO_SOURCE_H__
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
class CuvidVideoSource : public VideoReader_GPU::VideoSource
|
||||
{
|
||||
public:
|
||||
explicit CuvidVideoSource(const std::string& fname);
|
||||
~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); }
|
||||
|
||||
VideoReader_GPU::FormatInfo format() const;
|
||||
void start();
|
||||
void stop();
|
||||
bool isStarted() const;
|
||||
bool hasError() const;
|
||||
|
||||
private:
|
||||
CuvidVideoSource(const CuvidVideoSource&);
|
||||
CuvidVideoSource& operator =(const CuvidVideoSource&);
|
||||
|
||||
// Callback for handling packages of demuxed video data.
|
||||
//
|
||||
// Parameters:
|
||||
// pUserData - Pointer to user data. We must pass a pointer to a
|
||||
// VideoSourceData struct here, that contains a valid CUvideoparser
|
||||
// and FrameQueue.
|
||||
// pPacket - video-source data packet.
|
||||
//
|
||||
// NOTE: called from a different thread that doesn't not have a cuda context
|
||||
//
|
||||
static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket);
|
||||
|
||||
CUvideosource videoSource_;
|
||||
VideoReader_GPU::FormatInfo format_;
|
||||
};
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
#endif // __CUVUD_VIDEO_SOURCE_H__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __CUVUD_VIDEO_SOURCE_H__
|
||||
#define __CUVUD_VIDEO_SOURCE_H__
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
class CuvidVideoSource : public VideoReader_GPU::VideoSource
|
||||
{
|
||||
public:
|
||||
explicit CuvidVideoSource(const std::string& fname);
|
||||
~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); }
|
||||
|
||||
VideoReader_GPU::FormatInfo format() const;
|
||||
void start();
|
||||
void stop();
|
||||
bool isStarted() const;
|
||||
bool hasError() const;
|
||||
|
||||
private:
|
||||
CuvidVideoSource(const CuvidVideoSource&);
|
||||
CuvidVideoSource& operator =(const CuvidVideoSource&);
|
||||
|
||||
// Callback for handling packages of demuxed video data.
|
||||
//
|
||||
// Parameters:
|
||||
// pUserData - Pointer to user data. We must pass a pointer to a
|
||||
// VideoSourceData struct here, that contains a valid CUvideoparser
|
||||
// and FrameQueue.
|
||||
// pPacket - video-source data packet.
|
||||
//
|
||||
// NOTE: called from a different thread that doesn't not have a cuda context
|
||||
//
|
||||
static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket);
|
||||
|
||||
CUvideosource videoSource_;
|
||||
VideoReader_GPU::FormatInfo format_;
|
||||
};
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
#endif // __CUVUD_VIDEO_SOURCE_H__
|
||||
|
||||
@@ -124,7 +124,7 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
|
||||
|
||||
int gpuBorderType;
|
||||
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
|
||||
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
func(src, dst, search_window/2, block_window/2, h, gpuBorderType, StreamAccessor::getStream(s));
|
||||
}
|
||||
@@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template<typename T>
|
||||
void nlm_fast_gpu(const PtrStepSzb& src, PtrStepSzb dst, PtrStepi buffer,
|
||||
int search_window, int block_window, float h, cudaStream_t stream);
|
||||
|
||||
|
||||
void fnlm_split_channels(const PtrStepSz<uchar3>& lab, PtrStepb l, PtrStep<uchar2> ab, cudaStream_t stream);
|
||||
void fnlm_merge_channels(const PtrStepb& l, const PtrStep<uchar2>& ab, PtrStepSz<uchar3> lab, cudaStream_t stream);
|
||||
}
|
||||
@@ -152,30 +152,30 @@ namespace cv { namespace gpu { namespace device
|
||||
void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
|
||||
{
|
||||
CV_Assert(src.depth() == CV_8U && src.channels() < 4);
|
||||
|
||||
int border_size = search_window/2 + block_window/2;
|
||||
|
||||
int border_size = search_window/2 + block_window/2;
|
||||
Size esize = src.size() + Size(border_size, border_size) * 2;
|
||||
|
||||
cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
|
||||
cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
|
||||
GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
|
||||
|
||||
cv::gpu::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
|
||||
GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
|
||||
|
||||
|
||||
int bcols, brows;
|
||||
device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
|
||||
buffer.create(brows, bcols, CV_32S);
|
||||
|
||||
using namespace cv::gpu::device::imgproc;
|
||||
typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
|
||||
|
||||
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
|
||||
}
|
||||
|
||||
void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
|
||||
{
|
||||
{
|
||||
#if (CUDA_VERSION < 5000)
|
||||
(void)src;
|
||||
(void)dst;
|
||||
@@ -183,14 +183,14 @@ void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat&
|
||||
(void)h_color;
|
||||
(void)search_window;
|
||||
(void)block_window;
|
||||
(void)s;
|
||||
|
||||
(void)s;
|
||||
|
||||
CV_Error( CV_GpuApiCallError, "Lab method required CUDA 5.0 and higher" );
|
||||
#else
|
||||
|
||||
|
||||
CV_Assert(src.type() == CV_8UC3);
|
||||
|
||||
|
||||
lab.create(src.size(), src.type());
|
||||
cv::gpu::cvtColor(src, lab, CV_BGR2Lab, 0, s);
|
||||
|
||||
@@ -201,7 +201,7 @@ void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat&
|
||||
l.create(src.size(), CV_8U);
|
||||
ab.create(src.size(), CV_8UC2);
|
||||
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
|
||||
|
||||
|
||||
simpleMethod(l, l, h_luminance, search_window, block_window, s);
|
||||
simpleMethod(ab, ab, h_color, search_window, block_window, s);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,249 +1,249 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
#define error_entry(entry) { entry, #entry }
|
||||
|
||||
struct ErrorEntry
|
||||
{
|
||||
int code;
|
||||
string str;
|
||||
};
|
||||
|
||||
struct ErrorEntryComparer
|
||||
{
|
||||
int code;
|
||||
ErrorEntryComparer(int code_) : code(code_) {};
|
||||
bool operator()(const ErrorEntry& e) const { return e.code == code; }
|
||||
};
|
||||
|
||||
string getErrorString(int code, const ErrorEntry* errors, size_t n)
|
||||
{
|
||||
size_t idx = find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
|
||||
|
||||
const string& msg = (idx != n) ? errors[idx].str : string("Unknown error code");
|
||||
|
||||
ostringstream ostr;
|
||||
ostr << msg << " [Code = " << code << "]";
|
||||
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// NPP errors
|
||||
|
||||
const ErrorEntry npp_errors [] =
|
||||
{
|
||||
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
|
||||
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
|
||||
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
|
||||
#endif
|
||||
|
||||
error_entry( NPP_BAD_ARG_ERROR ),
|
||||
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPP_COEFF_ERROR ),
|
||||
error_entry( NPP_RECT_ERROR ),
|
||||
error_entry( NPP_QUAD_ERROR ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
|
||||
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
|
||||
error_entry( NPP_INTERPOLATION_ERROR ),
|
||||
error_entry( NPP_RESIZE_FACTOR_ERROR ),
|
||||
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
|
||||
error_entry( NPP_MEMFREE_ERR ),
|
||||
error_entry( NPP_MEMSET_ERR ),
|
||||
error_entry( NPP_MEMCPY_ERROR ),
|
||||
error_entry( NPP_MEM_ALLOC_ERR ),
|
||||
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_MIRROR_FLIP_ERR ),
|
||||
error_entry( NPP_INVALID_INPUT ),
|
||||
error_entry( NPP_ALIGNMENT_ERROR ),
|
||||
error_entry( NPP_STEP_ERROR ),
|
||||
error_entry( NPP_SIZE_ERROR ),
|
||||
error_entry( NPP_POINTER_ERROR ),
|
||||
error_entry( NPP_NULL_POINTER_ERROR ),
|
||||
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
|
||||
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
|
||||
error_entry( NPP_ERROR ),
|
||||
error_entry( NPP_NO_ERROR ),
|
||||
error_entry( NPP_SUCCESS ),
|
||||
error_entry( NPP_WARNING ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
|
||||
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
|
||||
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
|
||||
error_entry( NPP_DOUBLE_SIZE_WARNING ),
|
||||
error_entry( NPP_ODD_ROI_WARNING )
|
||||
};
|
||||
|
||||
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// NCV errors
|
||||
|
||||
const ErrorEntry ncv_errors [] =
|
||||
{
|
||||
error_entry( NCV_SUCCESS ),
|
||||
error_entry( NCV_UNKNOWN_ERROR ),
|
||||
error_entry( NCV_CUDA_ERROR ),
|
||||
error_entry( NCV_NPP_ERROR ),
|
||||
error_entry( NCV_FILE_ERROR ),
|
||||
error_entry( NCV_NULL_PTR ),
|
||||
error_entry( NCV_INCONSISTENT_INPUT ),
|
||||
error_entry( NCV_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NCV_DIMENSIONS_INVALID ),
|
||||
error_entry( NCV_INVALID_ROI ),
|
||||
error_entry( NCV_INVALID_STEP ),
|
||||
error_entry( NCV_INVALID_SCALE ),
|
||||
error_entry( NCV_INVALID_SCALE ),
|
||||
error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
|
||||
error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
|
||||
error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
|
||||
error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
|
||||
error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
|
||||
error_entry( NCV_ALLOCATOR_BAD_REUSE ),
|
||||
error_entry( NCV_MEM_COPY_ERROR ),
|
||||
error_entry( NCV_MEM_RESIDENCE_ERROR ),
|
||||
error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
|
||||
error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
|
||||
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
|
||||
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
|
||||
error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
|
||||
error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
|
||||
error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
|
||||
error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
|
||||
error_entry( NPPST_SUCCESS ),
|
||||
error_entry( NPPST_ERROR ),
|
||||
error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
|
||||
error_entry( NPPST_NULL_POINTER_ERROR ),
|
||||
error_entry( NPPST_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPPST_MEMCPY_ERROR ),
|
||||
error_entry( NPPST_MEM_ALLOC_ERR ),
|
||||
error_entry( NPPST_MEMFREE_ERR ),
|
||||
error_entry( NPPST_INVALID_ROI ),
|
||||
error_entry( NPPST_INVALID_STEP ),
|
||||
error_entry( NPPST_INVALID_SCALE ),
|
||||
error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
|
||||
error_entry( NPPST_MEM_RESIDENCE_ERROR ),
|
||||
error_entry( NPPST_MEM_INTERNAL_ERROR )
|
||||
};
|
||||
|
||||
const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// CUFFT errors
|
||||
|
||||
const ErrorEntry cufft_errors[] =
|
||||
{
|
||||
error_entry( CUFFT_INVALID_PLAN ),
|
||||
error_entry( CUFFT_ALLOC_FAILED ),
|
||||
error_entry( CUFFT_INVALID_TYPE ),
|
||||
error_entry( CUFFT_INVALID_VALUE ),
|
||||
error_entry( CUFFT_INTERNAL_ERROR ),
|
||||
error_entry( CUFFT_EXEC_FAILED ),
|
||||
error_entry( CUFFT_SETUP_FAILED ),
|
||||
error_entry( CUFFT_INVALID_SIZE ),
|
||||
error_entry( CUFFT_UNALIGNED_DATA )
|
||||
};
|
||||
|
||||
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// CUBLAS errors
|
||||
|
||||
const ErrorEntry cublas_errors[] =
|
||||
{
|
||||
error_entry( CUBLAS_STATUS_SUCCESS ),
|
||||
error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
|
||||
error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
|
||||
error_entry( CUBLAS_STATUS_INVALID_VALUE ),
|
||||
error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
|
||||
error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
|
||||
error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
|
||||
error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
|
||||
};
|
||||
|
||||
const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
|
||||
}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
void nppError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, npp_errors, npp_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
|
||||
void ncvError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, ncv_errors, ncv_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
|
||||
void cufftError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, cufft_errors, cufft_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
|
||||
void cublasError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, cublas_errors, cublas_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
namespace
|
||||
{
|
||||
#define error_entry(entry) { entry, #entry }
|
||||
|
||||
struct ErrorEntry
|
||||
{
|
||||
int code;
|
||||
string str;
|
||||
};
|
||||
|
||||
struct ErrorEntryComparer
|
||||
{
|
||||
int code;
|
||||
ErrorEntryComparer(int code_) : code(code_) {};
|
||||
bool operator()(const ErrorEntry& e) const { return e.code == code; }
|
||||
};
|
||||
|
||||
string getErrorString(int code, const ErrorEntry* errors, size_t n)
|
||||
{
|
||||
size_t idx = find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
|
||||
|
||||
const string& msg = (idx != n) ? errors[idx].str : string("Unknown error code");
|
||||
|
||||
ostringstream ostr;
|
||||
ostr << msg << " [Code = " << code << "]";
|
||||
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// NPP errors
|
||||
|
||||
const ErrorEntry npp_errors [] =
|
||||
{
|
||||
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
|
||||
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
|
||||
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
|
||||
#endif
|
||||
|
||||
error_entry( NPP_BAD_ARG_ERROR ),
|
||||
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPP_COEFF_ERROR ),
|
||||
error_entry( NPP_RECT_ERROR ),
|
||||
error_entry( NPP_QUAD_ERROR ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
|
||||
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
|
||||
error_entry( NPP_INTERPOLATION_ERROR ),
|
||||
error_entry( NPP_RESIZE_FACTOR_ERROR ),
|
||||
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
|
||||
error_entry( NPP_MEMFREE_ERR ),
|
||||
error_entry( NPP_MEMSET_ERR ),
|
||||
error_entry( NPP_MEMCPY_ERROR ),
|
||||
error_entry( NPP_MEM_ALLOC_ERR ),
|
||||
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_MIRROR_FLIP_ERR ),
|
||||
error_entry( NPP_INVALID_INPUT ),
|
||||
error_entry( NPP_ALIGNMENT_ERROR ),
|
||||
error_entry( NPP_STEP_ERROR ),
|
||||
error_entry( NPP_SIZE_ERROR ),
|
||||
error_entry( NPP_POINTER_ERROR ),
|
||||
error_entry( NPP_NULL_POINTER_ERROR ),
|
||||
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
|
||||
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
|
||||
error_entry( NPP_ERROR ),
|
||||
error_entry( NPP_NO_ERROR ),
|
||||
error_entry( NPP_SUCCESS ),
|
||||
error_entry( NPP_WARNING ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
|
||||
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
|
||||
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
|
||||
error_entry( NPP_DOUBLE_SIZE_WARNING ),
|
||||
error_entry( NPP_ODD_ROI_WARNING )
|
||||
};
|
||||
|
||||
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// NCV errors
|
||||
|
||||
const ErrorEntry ncv_errors [] =
|
||||
{
|
||||
error_entry( NCV_SUCCESS ),
|
||||
error_entry( NCV_UNKNOWN_ERROR ),
|
||||
error_entry( NCV_CUDA_ERROR ),
|
||||
error_entry( NCV_NPP_ERROR ),
|
||||
error_entry( NCV_FILE_ERROR ),
|
||||
error_entry( NCV_NULL_PTR ),
|
||||
error_entry( NCV_INCONSISTENT_INPUT ),
|
||||
error_entry( NCV_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NCV_DIMENSIONS_INVALID ),
|
||||
error_entry( NCV_INVALID_ROI ),
|
||||
error_entry( NCV_INVALID_STEP ),
|
||||
error_entry( NCV_INVALID_SCALE ),
|
||||
error_entry( NCV_INVALID_SCALE ),
|
||||
error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
|
||||
error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
|
||||
error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
|
||||
error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
|
||||
error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
|
||||
error_entry( NCV_ALLOCATOR_BAD_REUSE ),
|
||||
error_entry( NCV_MEM_COPY_ERROR ),
|
||||
error_entry( NCV_MEM_RESIDENCE_ERROR ),
|
||||
error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
|
||||
error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
|
||||
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
|
||||
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
|
||||
error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
|
||||
error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
|
||||
error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
|
||||
error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
|
||||
error_entry( NPPST_SUCCESS ),
|
||||
error_entry( NPPST_ERROR ),
|
||||
error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
|
||||
error_entry( NPPST_NULL_POINTER_ERROR ),
|
||||
error_entry( NPPST_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPPST_MEMCPY_ERROR ),
|
||||
error_entry( NPPST_MEM_ALLOC_ERR ),
|
||||
error_entry( NPPST_MEMFREE_ERR ),
|
||||
error_entry( NPPST_INVALID_ROI ),
|
||||
error_entry( NPPST_INVALID_STEP ),
|
||||
error_entry( NPPST_INVALID_SCALE ),
|
||||
error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
|
||||
error_entry( NPPST_MEM_RESIDENCE_ERROR ),
|
||||
error_entry( NPPST_MEM_INTERNAL_ERROR )
|
||||
};
|
||||
|
||||
const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// CUFFT errors
|
||||
|
||||
const ErrorEntry cufft_errors[] =
|
||||
{
|
||||
error_entry( CUFFT_INVALID_PLAN ),
|
||||
error_entry( CUFFT_ALLOC_FAILED ),
|
||||
error_entry( CUFFT_INVALID_TYPE ),
|
||||
error_entry( CUFFT_INVALID_VALUE ),
|
||||
error_entry( CUFFT_INTERNAL_ERROR ),
|
||||
error_entry( CUFFT_EXEC_FAILED ),
|
||||
error_entry( CUFFT_SETUP_FAILED ),
|
||||
error_entry( CUFFT_INVALID_SIZE ),
|
||||
error_entry( CUFFT_UNALIGNED_DATA )
|
||||
};
|
||||
|
||||
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// CUBLAS errors
|
||||
|
||||
const ErrorEntry cublas_errors[] =
|
||||
{
|
||||
error_entry( CUBLAS_STATUS_SUCCESS ),
|
||||
error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
|
||||
error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
|
||||
error_entry( CUBLAS_STATUS_INVALID_VALUE ),
|
||||
error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
|
||||
error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
|
||||
error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
|
||||
error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
|
||||
};
|
||||
|
||||
const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
|
||||
}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
void nppError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, npp_errors, npp_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
|
||||
void ncvError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, ncv_errors, ncv_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
|
||||
void cufftError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, cufft_errors, cufft_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
|
||||
void cublasError(int code, const char *file, const int line, const char *func)
|
||||
{
|
||||
string msg = getErrorString(code, cublas_errors, cublas_error_num);
|
||||
cv::gpu::error(msg.c_str(), file, line, func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,177 +1,177 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::FAST_GPU::FAST_GPU(int, bool, double) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::release() { throw_nogpu(); }
|
||||
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_nogpu(); return 0; }
|
||||
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
|
||||
nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
|
||||
{
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (image.empty())
|
||||
return;
|
||||
|
||||
(*this)(image, mask, d_keypoints_);
|
||||
downloadKeypoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
return;
|
||||
|
||||
Mat h_keypoints(d_keypoints);
|
||||
convertKeypoints(h_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (h_keypoints.empty())
|
||||
return;
|
||||
|
||||
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
|
||||
|
||||
int npoints = h_keypoints.cols;
|
||||
|
||||
keypoints.resize(npoints);
|
||||
|
||||
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
|
||||
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
|
||||
{
|
||||
calcKeyPointsLocation(img, mask);
|
||||
keypoints.cols = getKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace fast
|
||||
{
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
|
||||
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
|
||||
}
|
||||
}}}
|
||||
|
||||
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
|
||||
{
|
||||
using namespace cv::gpu::device::fast;
|
||||
|
||||
CV_Assert(img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
|
||||
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
|
||||
|
||||
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
|
||||
|
||||
if (nonmaxSupression)
|
||||
{
|
||||
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
|
||||
score_.setTo(Scalar::all(0));
|
||||
}
|
||||
|
||||
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSupression ? score_ : PtrStepSzi(), threshold);
|
||||
count_ = std::min(count_, maxKeypoints);
|
||||
|
||||
return count_;
|
||||
}
|
||||
|
||||
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
|
||||
{
|
||||
using namespace cv::gpu::device::fast;
|
||||
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
if (count_ == 0)
|
||||
return 0;
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
|
||||
|
||||
if (nonmaxSupression)
|
||||
return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
|
||||
|
||||
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
|
||||
kpLoc_.colRange(0, count_).copyTo(locRow);
|
||||
keypoints.row(1).setTo(Scalar::all(0));
|
||||
|
||||
return count_;
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::release()
|
||||
{
|
||||
kpLoc_.release();
|
||||
score_.release();
|
||||
|
||||
d_keypoints_.release();
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::FAST_GPU::FAST_GPU(int, bool, double) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::gpu::FAST_GPU::release() { throw_nogpu(); }
|
||||
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_nogpu(); return 0; }
|
||||
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
|
||||
nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
|
||||
{
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (image.empty())
|
||||
return;
|
||||
|
||||
(*this)(image, mask, d_keypoints_);
|
||||
downloadKeypoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
return;
|
||||
|
||||
Mat h_keypoints(d_keypoints);
|
||||
convertKeypoints(h_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (h_keypoints.empty())
|
||||
return;
|
||||
|
||||
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
|
||||
|
||||
int npoints = h_keypoints.cols;
|
||||
|
||||
keypoints.resize(npoints);
|
||||
|
||||
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
|
||||
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
|
||||
{
|
||||
calcKeyPointsLocation(img, mask);
|
||||
keypoints.cols = getKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace fast
|
||||
{
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
|
||||
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
|
||||
}
|
||||
}}}
|
||||
|
||||
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
|
||||
{
|
||||
using namespace cv::gpu::device::fast;
|
||||
|
||||
CV_Assert(img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
|
||||
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
|
||||
|
||||
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
|
||||
|
||||
if (nonmaxSupression)
|
||||
{
|
||||
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
|
||||
score_.setTo(Scalar::all(0));
|
||||
}
|
||||
|
||||
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSupression ? score_ : PtrStepSzi(), threshold);
|
||||
count_ = std::min(count_, maxKeypoints);
|
||||
|
||||
return count_;
|
||||
}
|
||||
|
||||
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
|
||||
{
|
||||
using namespace cv::gpu::device::fast;
|
||||
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
if (count_ == 0)
|
||||
return 0;
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
|
||||
|
||||
if (nonmaxSupression)
|
||||
return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
|
||||
|
||||
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
|
||||
kpLoc_.colRange(0, count_).copyTo(locRow);
|
||||
keypoints.row(1).setTo(Scalar::all(0));
|
||||
|
||||
return count_;
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::release()
|
||||
{
|
||||
kpLoc_.release();
|
||||
score_.release();
|
||||
|
||||
d_keypoints_.release();
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
@@ -1,182 +1,182 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "ffmpeg_video_source.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
#ifdef HAVE_FFMPEG
|
||||
#include "cap_ffmpeg_impl.hpp"
|
||||
#else
|
||||
#include "cap_ffmpeg_api.hpp"
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
Create_InputMediaStream_FFMPEG_Plugin create_InputMediaStream_FFMPEG_p = 0;
|
||||
Release_InputMediaStream_FFMPEG_Plugin release_InputMediaStream_FFMPEG_p = 0;
|
||||
Read_InputMediaStream_FFMPEG_Plugin read_InputMediaStream_FFMPEG_p = 0;
|
||||
|
||||
bool init_MediaStream_FFMPEG()
|
||||
{
|
||||
static bool initialized = 0;
|
||||
|
||||
if (!initialized)
|
||||
{
|
||||
#if defined WIN32 || defined _WIN32
|
||||
const char* module_name = "opencv_ffmpeg"
|
||||
CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION)
|
||||
#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
|
||||
"_64"
|
||||
#endif
|
||||
".dll";
|
||||
|
||||
static HMODULE cvFFOpenCV = LoadLibrary(module_name);
|
||||
|
||||
if (cvFFOpenCV)
|
||||
{
|
||||
create_InputMediaStream_FFMPEG_p =
|
||||
(Create_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_InputMediaStream_FFMPEG");
|
||||
release_InputMediaStream_FFMPEG_p =
|
||||
(Release_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_InputMediaStream_FFMPEG");
|
||||
read_InputMediaStream_FFMPEG_p =
|
||||
(Read_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "read_InputMediaStream_FFMPEG");
|
||||
|
||||
initialized = create_InputMediaStream_FFMPEG_p != 0 && release_InputMediaStream_FFMPEG_p != 0 && read_InputMediaStream_FFMPEG_p != 0;
|
||||
}
|
||||
#elif defined HAVE_FFMPEG
|
||||
create_InputMediaStream_FFMPEG_p = create_InputMediaStream_FFMPEG;
|
||||
release_InputMediaStream_FFMPEG_p = release_InputMediaStream_FFMPEG;
|
||||
read_InputMediaStream_FFMPEG_p = read_InputMediaStream_FFMPEG;
|
||||
|
||||
initialized = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
return initialized;
|
||||
}
|
||||
}
|
||||
|
||||
cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const std::string& fname) :
|
||||
stream_(0)
|
||||
{
|
||||
CV_Assert( init_MediaStream_FFMPEG() );
|
||||
|
||||
int codec;
|
||||
int chroma_format;
|
||||
int width;
|
||||
int height;
|
||||
|
||||
stream_ = create_InputMediaStream_FFMPEG_p(fname.c_str(), &codec, &chroma_format, &width, &height);
|
||||
if (!stream_)
|
||||
CV_Error(CV_StsUnsupportedFormat, "Unsupported video source");
|
||||
|
||||
format_.codec = static_cast<VideoReader_GPU::Codec>(codec);
|
||||
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(chroma_format);
|
||||
format_.width = width;
|
||||
format_.height = height;
|
||||
}
|
||||
|
||||
cv::gpu::detail::FFmpegVideoSource::~FFmpegVideoSource()
|
||||
{
|
||||
release_InputMediaStream_FFMPEG_p(stream_);
|
||||
}
|
||||
|
||||
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const
|
||||
{
|
||||
return format_;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FFmpegVideoSource::start()
|
||||
{
|
||||
stop_ = false;
|
||||
hasError_ = false;
|
||||
thread_.reset(new Thread(readLoop, this));
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FFmpegVideoSource::stop()
|
||||
{
|
||||
stop_ = true;
|
||||
thread_->wait();
|
||||
thread_.reset();
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FFmpegVideoSource::isStarted() const
|
||||
{
|
||||
return !stop_;
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FFmpegVideoSource::hasError() const
|
||||
{
|
||||
return hasError_;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData)
|
||||
{
|
||||
FFmpegVideoSource* thiz = static_cast<FFmpegVideoSource*>(userData);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned char* data;
|
||||
int size;
|
||||
int endOfFile;
|
||||
|
||||
if (!read_InputMediaStream_FFMPEG_p(thiz->stream_, &data, &size, &endOfFile))
|
||||
{
|
||||
thiz->hasError_ = !endOfFile;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!thiz->parseVideoData(data, size))
|
||||
{
|
||||
thiz->hasError_ = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (thiz->stop_)
|
||||
break;
|
||||
}
|
||||
|
||||
thiz->parseVideoData(0, 0, true);
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "ffmpeg_video_source.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
#ifdef HAVE_FFMPEG
|
||||
#include "cap_ffmpeg_impl.hpp"
|
||||
#else
|
||||
#include "cap_ffmpeg_api.hpp"
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
Create_InputMediaStream_FFMPEG_Plugin create_InputMediaStream_FFMPEG_p = 0;
|
||||
Release_InputMediaStream_FFMPEG_Plugin release_InputMediaStream_FFMPEG_p = 0;
|
||||
Read_InputMediaStream_FFMPEG_Plugin read_InputMediaStream_FFMPEG_p = 0;
|
||||
|
||||
bool init_MediaStream_FFMPEG()
|
||||
{
|
||||
static bool initialized = 0;
|
||||
|
||||
if (!initialized)
|
||||
{
|
||||
#if defined WIN32 || defined _WIN32
|
||||
const char* module_name = "opencv_ffmpeg"
|
||||
CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION)
|
||||
#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
|
||||
"_64"
|
||||
#endif
|
||||
".dll";
|
||||
|
||||
static HMODULE cvFFOpenCV = LoadLibrary(module_name);
|
||||
|
||||
if (cvFFOpenCV)
|
||||
{
|
||||
create_InputMediaStream_FFMPEG_p =
|
||||
(Create_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_InputMediaStream_FFMPEG");
|
||||
release_InputMediaStream_FFMPEG_p =
|
||||
(Release_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_InputMediaStream_FFMPEG");
|
||||
read_InputMediaStream_FFMPEG_p =
|
||||
(Read_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "read_InputMediaStream_FFMPEG");
|
||||
|
||||
initialized = create_InputMediaStream_FFMPEG_p != 0 && release_InputMediaStream_FFMPEG_p != 0 && read_InputMediaStream_FFMPEG_p != 0;
|
||||
}
|
||||
#elif defined HAVE_FFMPEG
|
||||
create_InputMediaStream_FFMPEG_p = create_InputMediaStream_FFMPEG;
|
||||
release_InputMediaStream_FFMPEG_p = release_InputMediaStream_FFMPEG;
|
||||
read_InputMediaStream_FFMPEG_p = read_InputMediaStream_FFMPEG;
|
||||
|
||||
initialized = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
return initialized;
|
||||
}
|
||||
}
|
||||
|
||||
cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const std::string& fname) :
|
||||
stream_(0)
|
||||
{
|
||||
CV_Assert( init_MediaStream_FFMPEG() );
|
||||
|
||||
int codec;
|
||||
int chroma_format;
|
||||
int width;
|
||||
int height;
|
||||
|
||||
stream_ = create_InputMediaStream_FFMPEG_p(fname.c_str(), &codec, &chroma_format, &width, &height);
|
||||
if (!stream_)
|
||||
CV_Error(CV_StsUnsupportedFormat, "Unsupported video source");
|
||||
|
||||
format_.codec = static_cast<VideoReader_GPU::Codec>(codec);
|
||||
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(chroma_format);
|
||||
format_.width = width;
|
||||
format_.height = height;
|
||||
}
|
||||
|
||||
cv::gpu::detail::FFmpegVideoSource::~FFmpegVideoSource()
|
||||
{
|
||||
release_InputMediaStream_FFMPEG_p(stream_);
|
||||
}
|
||||
|
||||
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const
|
||||
{
|
||||
return format_;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FFmpegVideoSource::start()
|
||||
{
|
||||
stop_ = false;
|
||||
hasError_ = false;
|
||||
thread_.reset(new Thread(readLoop, this));
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FFmpegVideoSource::stop()
|
||||
{
|
||||
stop_ = true;
|
||||
thread_->wait();
|
||||
thread_.reset();
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FFmpegVideoSource::isStarted() const
|
||||
{
|
||||
return !stop_;
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FFmpegVideoSource::hasError() const
|
||||
{
|
||||
return hasError_;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData)
|
||||
{
|
||||
FFmpegVideoSource* thiz = static_cast<FFmpegVideoSource*>(userData);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned char* data;
|
||||
int size;
|
||||
int endOfFile;
|
||||
|
||||
if (!read_InputMediaStream_FFMPEG_p(thiz->stream_, &data, &size, &endOfFile))
|
||||
{
|
||||
thiz->hasError_ = !endOfFile;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!thiz->parseVideoData(data, size))
|
||||
{
|
||||
thiz->hasError_ = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (thiz->stop_)
|
||||
break;
|
||||
}
|
||||
|
||||
thiz->parseVideoData(0, 0, true);
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
@@ -1,88 +1,88 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __FFMPEG_VIDEO_SOURCE_H__
|
||||
#define __FFMPEG_VIDEO_SOURCE_H__
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "thread_wrappers.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
struct InputMediaStream_FFMPEG;
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
class FFmpegVideoSource : public VideoReader_GPU::VideoSource
|
||||
{
|
||||
public:
|
||||
FFmpegVideoSource(const std::string& fname);
|
||||
~FFmpegVideoSource();
|
||||
|
||||
VideoReader_GPU::FormatInfo format() const;
|
||||
void start();
|
||||
void stop();
|
||||
bool isStarted() const;
|
||||
bool hasError() const;
|
||||
|
||||
private:
|
||||
FFmpegVideoSource(const FFmpegVideoSource&);
|
||||
FFmpegVideoSource& operator =(const FFmpegVideoSource&);
|
||||
|
||||
VideoReader_GPU::FormatInfo format_;
|
||||
|
||||
InputMediaStream_FFMPEG* stream_;
|
||||
|
||||
std::auto_ptr<Thread> thread_;
|
||||
volatile bool stop_;
|
||||
volatile bool hasError_;
|
||||
|
||||
static void readLoop(void* userData);
|
||||
};
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
#endif // __CUVUD_VIDEO_SOURCE_H__
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __FFMPEG_VIDEO_SOURCE_H__
|
||||
#define __FFMPEG_VIDEO_SOURCE_H__
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "thread_wrappers.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
struct InputMediaStream_FFMPEG;
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
class FFmpegVideoSource : public VideoReader_GPU::VideoSource
|
||||
{
|
||||
public:
|
||||
FFmpegVideoSource(const std::string& fname);
|
||||
~FFmpegVideoSource();
|
||||
|
||||
VideoReader_GPU::FormatInfo format() const;
|
||||
void start();
|
||||
void stop();
|
||||
bool isStarted() const;
|
||||
bool hasError() const;
|
||||
|
||||
private:
|
||||
FFmpegVideoSource(const FFmpegVideoSource&);
|
||||
FFmpegVideoSource& operator =(const FFmpegVideoSource&);
|
||||
|
||||
VideoReader_GPU::FormatInfo format_;
|
||||
|
||||
InputMediaStream_FFMPEG* stream_;
|
||||
|
||||
std::auto_ptr<Thread> thread_;
|
||||
volatile bool stop_;
|
||||
volatile bool hasError_;
|
||||
|
||||
static void readLoop(void* userData);
|
||||
};
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
#endif // __CUVUD_VIDEO_SOURCE_H__
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,117 +1,117 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "frame_queue.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
cv::gpu::detail::FrameQueue::FrameQueue() :
|
||||
endOfDecode_(0),
|
||||
framesInQueue_(0),
|
||||
readPosition_(0)
|
||||
{
|
||||
std::memset(displayQueue_, 0, sizeof(displayQueue_));
|
||||
std::memset((void*)isFrameInUse_, 0, sizeof(isFrameInUse_));
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
|
||||
{
|
||||
while (isInUse(pictureIndex))
|
||||
{
|
||||
// Decoder is getting too far ahead from display
|
||||
Thread::sleep(1);
|
||||
|
||||
if (isEndOfDecode())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
|
||||
{
|
||||
// Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
|
||||
// for display
|
||||
isFrameInUse_[picParams->picture_index] = true;
|
||||
|
||||
// Wait until we have a free entry in the display queue (should never block if we have enough entries)
|
||||
do
|
||||
{
|
||||
bool isFramePlaced = false;
|
||||
|
||||
{
|
||||
CriticalSection::AutoLock autoLock(criticalSection_);
|
||||
|
||||
if (framesInQueue_ < MaximumSize)
|
||||
{
|
||||
int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
|
||||
displayQueue_[writePosition] = *picParams;
|
||||
framesInQueue_++;
|
||||
isFramePlaced = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (isFramePlaced) // Done
|
||||
break;
|
||||
|
||||
// Wait a bit
|
||||
Thread::sleep(1);
|
||||
} while (!isEndOfDecode());
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
|
||||
{
|
||||
CriticalSection::AutoLock autoLock(criticalSection_);
|
||||
|
||||
if (framesInQueue_ > 0)
|
||||
{
|
||||
int entry = readPosition_;
|
||||
displayInfo = displayQueue_[entry];
|
||||
readPosition_ = (entry + 1) % MaximumSize;
|
||||
framesInQueue_--;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "frame_queue.h"
|
||||
|
||||
#if defined(HAVE_CUDA) && !defined(__APPLE__)
|
||||
|
||||
cv::gpu::detail::FrameQueue::FrameQueue() :
|
||||
endOfDecode_(0),
|
||||
framesInQueue_(0),
|
||||
readPosition_(0)
|
||||
{
|
||||
std::memset(displayQueue_, 0, sizeof(displayQueue_));
|
||||
std::memset((void*)isFrameInUse_, 0, sizeof(isFrameInUse_));
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
|
||||
{
|
||||
while (isInUse(pictureIndex))
|
||||
{
|
||||
// Decoder is getting too far ahead from display
|
||||
Thread::sleep(1);
|
||||
|
||||
if (isEndOfDecode())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
|
||||
{
|
||||
// Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
|
||||
// for display
|
||||
isFrameInUse_[picParams->picture_index] = true;
|
||||
|
||||
// Wait until we have a free entry in the display queue (should never block if we have enough entries)
|
||||
do
|
||||
{
|
||||
bool isFramePlaced = false;
|
||||
|
||||
{
|
||||
CriticalSection::AutoLock autoLock(criticalSection_);
|
||||
|
||||
if (framesInQueue_ < MaximumSize)
|
||||
{
|
||||
int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
|
||||
displayQueue_[writePosition] = *picParams;
|
||||
framesInQueue_++;
|
||||
isFramePlaced = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (isFramePlaced) // Done
|
||||
break;
|
||||
|
||||
// Wait a bit
|
||||
Thread::sleep(1);
|
||||
} while (!isEndOfDecode());
|
||||
}
|
||||
|
||||
bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
|
||||
{
|
||||
CriticalSection::AutoLock autoLock(criticalSection_);
|
||||
|
||||
if (framesInQueue_ > 0)
|
||||
{
|
||||
int entry = readPosition_;
|
||||
displayInfo = displayQueue_[entry];
|
||||
readPosition_ = (entry + 1) % MaximumSize;
|
||||
framesInQueue_--;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // HAVE_CUDA
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user