Normalize line endings and whitespace

This commit is contained in:
OpenCV Buildbot
2012-10-17 11:12:04 +04:00
committed by Andrey Kamaev
parent 0442bca235
commit 81f826db2b
1511 changed files with 258678 additions and 258624 deletions

View File

@@ -44,7 +44,7 @@ if (HAVE_CUDA)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler /wd4251)
endif()
endif()
ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,221 +1,221 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_COLOR_HPP__
#define __OPENCV_GPU_COLOR_HPP__
#include "detail/color_detail.hpp"
namespace cv { namespace gpu { namespace device
{
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
// {
// typedef ... functor_type;
// static __host__ __device__ functor_type create_functor();
// };
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_COLOR_HPP__
#define __OPENCV_GPU_COLOR_HPP__
#include "detail/color_detail.hpp"
namespace cv { namespace gpu { namespace device
{
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
// {
// typedef ... functor_type;
// static __host__ __device__ functor_type create_functor();
// };
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__

View File

@@ -1,114 +1,114 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_COMMON_HPP__
#define __OPENCV_GPU_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cuda_devptrs.hpp"
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795
#endif
#ifndef CV_PI_F
#ifndef CV_PI
#define CV_PI_F 3.14159265f
#else
#define CV_PI_F ((float)CV_PI)
#endif
#endif
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv { namespace gpu
{
void error(const char *error_string, const char *file, const int line, const char *func);
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
static inline bool isAligned(size_t step, size_t size)
{
return step % size == 0;
}
}}
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
#ifdef __CUDACC__
namespace cv { namespace gpu
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
namespace device
{
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
}
}}
#endif // __CUDACC__
#endif // __OPENCV_GPU_COMMON_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_COMMON_HPP__
#define __OPENCV_GPU_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cuda_devptrs.hpp"
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795
#endif
#ifndef CV_PI_F
#ifndef CV_PI
#define CV_PI_F 3.14159265f
#else
#define CV_PI_F ((float)CV_PI)
#endif
#endif
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv { namespace gpu
{
void error(const char *error_string, const char *file, const int line, const char *func);
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
static inline bool isAligned(size_t step, size_t size)
{
return step % size == 0;
}
}}
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
#ifdef __CUDACC__
namespace cv { namespace gpu
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
namespace device
{
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
}
}}
#endif // __CUDACC__
#endif // __OPENCV_GPU_COMMON_HPP__

View File

@@ -1,105 +1,105 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template <typename T> struct ForceGlob
{
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define OPENCV_GPU_ASM_PTR "l"
#else
// 32-bit register modifier for inlined asm
#define OPENCV_GPU_ASM_PTR "r"
#endif
template<class T> struct ForceGlob;
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \
};
#define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \
};
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
#undef OPENCV_GPU_ASM_PTR
#endif // __CUDA_ARCH__ >= 200
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template <typename T> struct ForceGlob
{
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define OPENCV_GPU_ASM_PTR "l"
#else
// 32-bit register modifier for inlined asm
#define OPENCV_GPU_ASM_PTR "r"
#endif
template<class T> struct ForceGlob;
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \
};
#define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \
};
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
#undef OPENCV_GPU_ASM_PTR
#endif // __CUDA_ARCH__ >= 200
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__

File diff suppressed because it is too large Load Diff

View File

@@ -1,395 +1,395 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
#include "../functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace transform_detail
{
//! Read Write Traits
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
{
typedef typename TypeVec<T, shift>::vec_type read_type;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
{
typedef typename TypeVec<T1, shift>::vec_type read_type1;
typedef typename TypeVec<T2, shift>::vec_type read_type2;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
//! Transform kernels
template <int shift> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <> struct OpUnroller<8>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src.a7);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src1.a0, src2.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src1.a1, src2.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src1.a2, src2.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src1.a3, src2.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src1.a4, src2.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src1.a5, src2.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src1.a6, src2.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src1.a7, src2.a7);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
{
typedef TransformFunctorTraits<UnOp> ft;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src_.cols)
{
const read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
dst.ptr(y)[x] = op(src.ptr(y)[x]);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
const Mask mask, const BinOp op)
{
typedef TransformFunctorTraits<BinOp> ft;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
{
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
const Mask mask, const BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
const T1 src1_data = src1.ptr(y)[x];
const T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
template <bool UseSmart> struct TransformDispatcher;
template<> struct TransformDispatcher<false>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
#include "../functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace transform_detail
{
//! Read Write Traits
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
{
typedef typename TypeVec<T, shift>::vec_type read_type;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
{
typedef typename TypeVec<T1, shift>::vec_type read_type1;
typedef typename TypeVec<T2, shift>::vec_type read_type2;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
//! Transform kernels
template <int shift> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <> struct OpUnroller<8>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src.a7);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src1.a0, src2.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src1.a1, src2.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src1.a2, src2.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src1.a3, src2.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src1.a4, src2.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src1.a5, src2.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src1.a6, src2.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src1.a7, src2.a7);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
{
typedef TransformFunctorTraits<UnOp> ft;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src_.cols)
{
const read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
dst.ptr(y)[x] = op(src.ptr(y)[x]);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
const Mask mask, const BinOp op)
{
typedef TransformFunctorTraits<BinOp> ft;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
{
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
const Mask mask, const BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
const T1 src1_data = src1.ptr(y)[x];
const T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
template <bool UseSmart> struct TransformDispatcher;
template<> struct TransformDispatcher<false>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__

View File

@@ -1,187 +1,187 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
namespace cv { namespace gpu { namespace device
{
namespace type_traits_detail
{
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
template <> struct IsIntegral<char> { enum {value = 1}; };
template <> struct IsIntegral<bool> { enum {value = 1}; };
template <typename T> struct IsFloat { enum {value = 0}; };
template <> struct IsFloat<float> { enum {value = 1}; };
template <> struct IsFloat<double> { enum {value = 1}; };
template <typename T> struct IsVec { enum {value = 0}; };
template <> struct IsVec<uchar1> { enum {value = 1}; };
template <> struct IsVec<uchar2> { enum {value = 1}; };
template <> struct IsVec<uchar3> { enum {value = 1}; };
template <> struct IsVec<uchar4> { enum {value = 1}; };
template <> struct IsVec<uchar8> { enum {value = 1}; };
template <> struct IsVec<char1> { enum {value = 1}; };
template <> struct IsVec<char2> { enum {value = 1}; };
template <> struct IsVec<char3> { enum {value = 1}; };
template <> struct IsVec<char4> { enum {value = 1}; };
template <> struct IsVec<char8> { enum {value = 1}; };
template <> struct IsVec<ushort1> { enum {value = 1}; };
template <> struct IsVec<ushort2> { enum {value = 1}; };
template <> struct IsVec<ushort3> { enum {value = 1}; };
template <> struct IsVec<ushort4> { enum {value = 1}; };
template <> struct IsVec<ushort8> { enum {value = 1}; };
template <> struct IsVec<short1> { enum {value = 1}; };
template <> struct IsVec<short2> { enum {value = 1}; };
template <> struct IsVec<short3> { enum {value = 1}; };
template <> struct IsVec<short4> { enum {value = 1}; };
template <> struct IsVec<short8> { enum {value = 1}; };
template <> struct IsVec<uint1> { enum {value = 1}; };
template <> struct IsVec<uint2> { enum {value = 1}; };
template <> struct IsVec<uint3> { enum {value = 1}; };
template <> struct IsVec<uint4> { enum {value = 1}; };
template <> struct IsVec<uint8> { enum {value = 1}; };
template <> struct IsVec<int1> { enum {value = 1}; };
template <> struct IsVec<int2> { enum {value = 1}; };
template <> struct IsVec<int3> { enum {value = 1}; };
template <> struct IsVec<int4> { enum {value = 1}; };
template <> struct IsVec<int8> { enum {value = 1}; };
template <> struct IsVec<float1> { enum {value = 1}; };
template <> struct IsVec<float2> { enum {value = 1}; };
template <> struct IsVec<float3> { enum {value = 1}; };
template <> struct IsVec<float4> { enum {value = 1}; };
template <> struct IsVec<float8> { enum {value = 1}; };
template <> struct IsVec<double1> { enum {value = 1}; };
template <> struct IsVec<double2> { enum {value = 1}; };
template <> struct IsVec<double3> { enum {value = 1}; };
template <> struct IsVec<double4> { enum {value = 1}; };
template <> struct IsVec<double8> { enum {value = 1}; };
template <class U> struct AddParameterType { typedef const U& type; };
template <class U> struct AddParameterType<U&> { typedef U& type; };
template <> struct AddParameterType<void> { typedef void type; };
template <class U> struct ReferenceTraits
{
enum { value = false };
typedef U type;
};
template <class U> struct ReferenceTraits<U&>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits
{
enum { value = false };
typedef void type;
};
template <class U> struct PointerTraits<U*>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits<U*&>
{
enum { value = true };
typedef U type;
};
template <class U> struct UnConst
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnConst<const U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnConst<const U&>
{
typedef U& type;
enum { value = 1 };
};
template <class U> struct UnVolatile
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnVolatile<volatile U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnVolatile<volatile U&>
{
typedef U& type;
enum { value = 1 };
};
} // namespace type_traits_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
namespace cv { namespace gpu { namespace device
{
namespace type_traits_detail
{
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
template <> struct IsIntegral<char> { enum {value = 1}; };
template <> struct IsIntegral<bool> { enum {value = 1}; };
template <typename T> struct IsFloat { enum {value = 0}; };
template <> struct IsFloat<float> { enum {value = 1}; };
template <> struct IsFloat<double> { enum {value = 1}; };
template <typename T> struct IsVec { enum {value = 0}; };
template <> struct IsVec<uchar1> { enum {value = 1}; };
template <> struct IsVec<uchar2> { enum {value = 1}; };
template <> struct IsVec<uchar3> { enum {value = 1}; };
template <> struct IsVec<uchar4> { enum {value = 1}; };
template <> struct IsVec<uchar8> { enum {value = 1}; };
template <> struct IsVec<char1> { enum {value = 1}; };
template <> struct IsVec<char2> { enum {value = 1}; };
template <> struct IsVec<char3> { enum {value = 1}; };
template <> struct IsVec<char4> { enum {value = 1}; };
template <> struct IsVec<char8> { enum {value = 1}; };
template <> struct IsVec<ushort1> { enum {value = 1}; };
template <> struct IsVec<ushort2> { enum {value = 1}; };
template <> struct IsVec<ushort3> { enum {value = 1}; };
template <> struct IsVec<ushort4> { enum {value = 1}; };
template <> struct IsVec<ushort8> { enum {value = 1}; };
template <> struct IsVec<short1> { enum {value = 1}; };
template <> struct IsVec<short2> { enum {value = 1}; };
template <> struct IsVec<short3> { enum {value = 1}; };
template <> struct IsVec<short4> { enum {value = 1}; };
template <> struct IsVec<short8> { enum {value = 1}; };
template <> struct IsVec<uint1> { enum {value = 1}; };
template <> struct IsVec<uint2> { enum {value = 1}; };
template <> struct IsVec<uint3> { enum {value = 1}; };
template <> struct IsVec<uint4> { enum {value = 1}; };
template <> struct IsVec<uint8> { enum {value = 1}; };
template <> struct IsVec<int1> { enum {value = 1}; };
template <> struct IsVec<int2> { enum {value = 1}; };
template <> struct IsVec<int3> { enum {value = 1}; };
template <> struct IsVec<int4> { enum {value = 1}; };
template <> struct IsVec<int8> { enum {value = 1}; };
template <> struct IsVec<float1> { enum {value = 1}; };
template <> struct IsVec<float2> { enum {value = 1}; };
template <> struct IsVec<float3> { enum {value = 1}; };
template <> struct IsVec<float4> { enum {value = 1}; };
template <> struct IsVec<float8> { enum {value = 1}; };
template <> struct IsVec<double1> { enum {value = 1}; };
template <> struct IsVec<double2> { enum {value = 1}; };
template <> struct IsVec<double3> { enum {value = 1}; };
template <> struct IsVec<double4> { enum {value = 1}; };
template <> struct IsVec<double8> { enum {value = 1}; };
template <class U> struct AddParameterType { typedef const U& type; };
template <class U> struct AddParameterType<U&> { typedef U& type; };
template <> struct AddParameterType<void> { typedef void type; };
template <class U> struct ReferenceTraits
{
enum { value = false };
typedef U type;
};
template <class U> struct ReferenceTraits<U&>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits
{
enum { value = false };
typedef void type;
};
template <class U> struct PointerTraits<U*>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits<U*&>
{
enum { value = true };
typedef U type;
};
template <class U> struct UnConst
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnConst<const U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnConst<const U&>
{
typedef U& type;
enum { value = 1 };
};
template <class U> struct UnVolatile
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnVolatile<volatile U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnVolatile<volatile U&>
{
typedef U& type;
enum { value = 1 };
};
} // namespace type_traits_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__

View File

@@ -1,117 +1,117 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
#include "../datamov_utils.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vec_distance_detail
{
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
{
template <typename Dist, typename T1, typename T2>
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
{
if (ind < len)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, ind, val2);
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
}
}
template <typename Dist, typename T1, typename T2>
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, 0, val2);
vecGlob += THREAD_DIM;
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
}
};
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
{
}
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
{
}
};
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
}
};
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
}
};
} // namespace vec_distance_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
#include "../datamov_utils.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vec_distance_detail
{
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
{
template <typename Dist, typename T1, typename T2>
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
{
if (ind < len)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, ind, val2);
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
}
}
template <typename Dist, typename T1, typename T2>
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, 0, val2);
vecGlob += THREAD_DIM;
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
}
};
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
{
}
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
{
}
};
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
}
};
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
}
};
} // namespace vec_distance_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__

View File

@@ -1,80 +1,80 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
namespace cv { namespace gpu { namespace device
{
template<class T> struct DynamicSharedMem
{
__device__ __forceinline__ operator T*()
{
extern __shared__ int __smem[];
return (T*)__smem;
}
__device__ __forceinline__ operator const T*() const
{
extern __shared__ int __smem[];
return (T*)__smem;
}
};
// specialize for double to avoid unaligned memory access compile errors
template<> struct DynamicSharedMem<double>
{
__device__ __forceinline__ operator double*()
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
__device__ __forceinline__ operator const double*() const
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
};
}}}
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
namespace cv { namespace gpu { namespace device
{
template<class T> struct DynamicSharedMem
{
__device__ __forceinline__ operator T*()
{
extern __shared__ int __smem[];
return (T*)__smem;
}
__device__ __forceinline__ operator const T*() const
{
extern __shared__ int __smem[];
return (T*)__smem;
}
};
// specialize for double to avoid unaligned memory access compile errors
template<> struct DynamicSharedMem<double>
{
__device__ __forceinline__ operator double*()
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
__device__ __forceinline__ operator const double*() const
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
};
}}}
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__

View File

@@ -1,278 +1,278 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_FILTERS_HPP__
#define __OPENCV_GPU_FILTERS_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
#include "type_traits.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rz(y), __float2int_rz(x));
}
const Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
};
template <typename Ptr2D> struct CubicFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
float x = fabsf(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__device__ elem_type operator ()(float y, float x) const
{
const float xmin = ::ceilf(x - 2.0f);
const float xmax = ::floorf(x + 2.0f);
const float ymin = ::ceilf(y - 2.0f);
const float ymax = ::floorf(y + 2.0f);
work_type sum = VecTraits<work_type>::all(0);
float wsum = 0.0f;
for (float cy = ymin; cy <= ymax; cy += 1.0f)
{
for (float cx = xmin; cx <= xmax; cx += 1.0f)
{
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
wsum += w;
}
}
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
return saturate_cast<elem_type>(res);
}
const Ptr2D src;
};
// for integer scaling
template <typename Ptr2D> struct IntegerAreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for(int dy = sy1; dy < sy2; ++dy)
for(int dx = sx1; dx < sx2; ++dx)
{
out = out + src(dy, dx) * scale;
}
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y ,scale;
};
template <typename Ptr2D> struct AreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(dy, dx) * scale;
if (sx1 > fsx1)
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y;
int width, haight;
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_FILTERS_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_FILTERS_HPP__
#define __OPENCV_GPU_FILTERS_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
#include "type_traits.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rz(y), __float2int_rz(x));
}
const Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
};
template <typename Ptr2D> struct CubicFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
float x = fabsf(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__device__ elem_type operator ()(float y, float x) const
{
const float xmin = ::ceilf(x - 2.0f);
const float xmax = ::floorf(x + 2.0f);
const float ymin = ::ceilf(y - 2.0f);
const float ymax = ::floorf(y + 2.0f);
work_type sum = VecTraits<work_type>::all(0);
float wsum = 0.0f;
for (float cy = ymin; cy <= ymax; cy += 1.0f)
{
for (float cx = xmin; cx <= xmax; cx += 1.0f)
{
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
wsum += w;
}
}
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
return saturate_cast<elem_type>(res);
}
const Ptr2D src;
};
// for integer scaling
template <typename Ptr2D> struct IntegerAreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for(int dy = sy1; dy < sy2; ++dy)
for(int dx = sx1; dx < sx2; ++dx)
{
out = out + src(dy, dx) * scale;
}
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y ,scale;
};
template <typename Ptr2D> struct AreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(dy, dx) * scale;
if (sx1 > fsx1)
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y;
int width, haight;
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_FILTERS_HPP__

View File

@@ -1,72 +1,72 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
#include <cstdio>
namespace cv { namespace gpu { namespace device
{
template<class Func>
void printFuncAttrib(Func& func)
{
cudaFuncAttributes attrs;
cudaFuncGetAttributes(&attrs, func);
printf("=== Function stats ===\n");
printf("Name: \n");
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
printf("numRegs = %d\n", attrs.numRegs);
printf("ptxVersion = %d\n", attrs.ptxVersion);
printf("binaryVersion = %d\n", attrs.binaryVersion);
printf("\n");
fflush(stdout);
}
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
#include <cstdio>
namespace cv { namespace gpu { namespace device
{
template<class Func>
void printFuncAttrib(Func& func)
{
cudaFuncAttributes attrs;
cudaFuncGetAttributes(&attrs, func);
printf("=== Function stats ===\n");
printf("Name: \n");
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
printf("numRegs = %d\n", attrs.numRegs);
printf("ptxVersion = %d\n", attrs.ptxVersion);
printf("binaryVersion = %d\n", attrs.binaryVersion);
printf("\n");
fflush(stdout);
}
}}} // namespace cv { namespace gpu { namespace device
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,235 +1,235 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
#include <limits>
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<class T> struct numeric_limits
{
typedef T type;
__device__ __forceinline__ static type min() { return type(); };
__device__ __forceinline__ static type max() { return type(); };
__device__ __forceinline__ static type epsilon() { return type(); }
__device__ __forceinline__ static type round_error() { return type(); }
__device__ __forceinline__ static type denorm_min() { return type(); }
__device__ __forceinline__ static type infinity() { return type(); }
__device__ __forceinline__ static type quiet_NaN() { return type(); }
__device__ __forceinline__ static type signaling_NaN() { return T(); }
static const bool is_signed;
};
template<> struct numeric_limits<bool>
{
typedef bool type;
__device__ __forceinline__ static type min() { return false; };
__device__ __forceinline__ static type max() { return true; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<char>
{
typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; };
__device__ __forceinline__ static type max() { return CHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (char)-1 == -1;
};
template<> struct numeric_limits<signed char>
{
typedef char type;
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (signed char)-1 == -1;
};
template<> struct numeric_limits<unsigned char>
{
typedef unsigned char type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<short>
{
typedef short type;
__device__ __forceinline__ static type min() { return SHRT_MIN; };
__device__ __forceinline__ static type max() { return SHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned short>
{
typedef unsigned short type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return USHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<int>
{
typedef int type;
__device__ __forceinline__ static type min() { return INT_MIN; };
__device__ __forceinline__ static type max() { return INT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned int>
{
typedef unsigned int type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UINT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<long>
{
typedef long type;
__device__ __forceinline__ static type min() { return LONG_MIN; };
__device__ __forceinline__ static type max() { return LONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned long>
{
typedef unsigned long type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return ULONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<float>
{
typedef float type;
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<double>
{
typedef double type;
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
}}} // namespace cv { namespace gpu { namespace device {
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
#include <limits>
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<class T> struct numeric_limits
{
typedef T type;
__device__ __forceinline__ static type min() { return type(); };
__device__ __forceinline__ static type max() { return type(); };
__device__ __forceinline__ static type epsilon() { return type(); }
__device__ __forceinline__ static type round_error() { return type(); }
__device__ __forceinline__ static type denorm_min() { return type(); }
__device__ __forceinline__ static type infinity() { return type(); }
__device__ __forceinline__ static type quiet_NaN() { return type(); }
__device__ __forceinline__ static type signaling_NaN() { return T(); }
static const bool is_signed;
};
template<> struct numeric_limits<bool>
{
typedef bool type;
__device__ __forceinline__ static type min() { return false; };
__device__ __forceinline__ static type max() { return true; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<char>
{
typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; };
__device__ __forceinline__ static type max() { return CHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (char)-1 == -1;
};
template<> struct numeric_limits<signed char>
{
typedef char type;
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (signed char)-1 == -1;
};
template<> struct numeric_limits<unsigned char>
{
typedef unsigned char type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<short>
{
typedef short type;
__device__ __forceinline__ static type min() { return SHRT_MIN; };
__device__ __forceinline__ static type max() { return SHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned short>
{
typedef unsigned short type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return USHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<int>
{
typedef int type;
__device__ __forceinline__ static type min() { return INT_MIN; };
__device__ __forceinline__ static type max() { return INT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned int>
{
typedef unsigned int type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UINT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<long>
{
typedef long type;
__device__ __forceinline__ static type min() { return LONG_MIN; };
__device__ __forceinline__ static type max() { return LONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned long>
{
typedef unsigned long type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return ULONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<float>
{
typedef float type;
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<double>
{
typedef double type;
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
}}} // namespace cv { namespace gpu { namespace device {
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__

View File

@@ -1,216 +1,216 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__
#define __OPENCV_GPU_SATURATE_CAST_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{
return (uchar) ::max((int)v, 0);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{
return (uchar) ::min(v, (uint)UCHAR_MAX);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{
return saturate_cast<uchar>((uint)v);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<uchar>(iv);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<uchar>(iv);
#else
return saturate_cast<uchar>((float)v);
#endif
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{
return (schar) ::min((int)v, SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
{
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
{
return saturate_cast<schar>((int)v);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{
return (schar) ::min(v, (uint)SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<schar>(iv);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<schar>(iv);
#else
return saturate_cast<schar>((float)v);
#endif
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{
return (ushort) ::max((int)v, 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{
return (ushort) ::max((int)v, 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{
return (ushort) ::min(v, (uint)USHRT_MAX);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<ushort>(iv);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<ushort>(iv);
#else
return saturate_cast<ushort>((float)v);
#endif
}
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
{
return (short) ::min((int)v, SHRT_MAX);
}
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
{
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
}
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
{
return (short) ::min(v, (uint)SHRT_MAX);
}
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<short>(iv);
}
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<short>(iv);
#else
return saturate_cast<short>((float)v);
#endif
}
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
{
return __float2int_rn(v);
}
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2int_rn(v);
#else
return saturate_cast<int>((float)v);
#endif
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
{
return __float2uint_rn(v);
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2uint_rn(v);
#else
return saturate_cast<uint>((float)v);
#endif
}
}}}
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__
#define __OPENCV_GPU_SATURATE_CAST_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{
return (uchar) ::max((int)v, 0);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{
return (uchar) ::min(v, (uint)UCHAR_MAX);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{
return saturate_cast<uchar>((uint)v);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<uchar>(iv);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<uchar>(iv);
#else
return saturate_cast<uchar>((float)v);
#endif
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{
return (schar) ::min((int)v, SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
{
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
{
return saturate_cast<schar>((int)v);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{
return (schar) ::min(v, (uint)SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<schar>(iv);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<schar>(iv);
#else
return saturate_cast<schar>((float)v);
#endif
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{
return (ushort) ::max((int)v, 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{
return (ushort) ::max((int)v, 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{
return (ushort) ::min(v, (uint)USHRT_MAX);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<ushort>(iv);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<ushort>(iv);
#else
return saturate_cast<ushort>((float)v);
#endif
}
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
{
return (short) ::min((int)v, SHRT_MAX);
}
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
{
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
}
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
{
return (short) ::min(v, (uint)SHRT_MAX);
}
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<short>(iv);
}
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<short>(iv);
#else
return saturate_cast<short>((float)v);
#endif
}
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
{
return __float2int_rn(v);
}
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2int_rn(v);
#else
return saturate_cast<int>((float)v);
#endif
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
{
return __float2uint_rn(v);
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2uint_rn(v);
#else
return saturate_cast<uint>((float)v);
#endif
}
}}}
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */

View File

@@ -1,67 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
#define __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
#if defined(__CUDACC__)
#define __OPENCV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __OPENCV_GPU_HOST_DEVICE__
#endif
namespace cv { namespace gpu
{
namespace device
{
template<bool expr> struct Static {};
template<> struct Static<true>
{
__OPENCV_GPU_HOST_DEVICE__ static void check() {};
};
}
}}
#undef __OPENCV_GPU_HOST_DEVICE__
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
#define __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
#if defined(__CUDACC__)
#define __OPENCV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __OPENCV_GPU_HOST_DEVICE__
#endif
namespace cv { namespace gpu
{
namespace device
{
template<bool expr> struct Static {};
template<> struct Static<true>
{
__OPENCV_GPU_HOST_DEVICE__ static void check() {};
};
}
}}
#undef __OPENCV_GPU_HOST_DEVICE__
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */

View File

@@ -1,67 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "common.hpp"
#include "utility.hpp"
#include "detail/transform_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
}
}}}
#endif // __OPENCV_GPU_TRANSFORM_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "common.hpp"
#include "utility.hpp"
#include "detail/transform_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
}
}}}
#endif // __OPENCV_GPU_TRANSFORM_HPP__

View File

@@ -1,82 +1,82 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
#include "detail/type_traits_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct IsSimpleParameter
{
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
};
template <typename T> struct TypeTraits
{
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
enum { isConst = type_traits_detail::UnConst<T>::value };
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
enum { isArith = isIntegral || isFloat };
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
};
}}}
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
#include "detail/type_traits_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct IsSimpleParameter
{
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
};
template <typename T> struct TypeTraits
{
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
enum { isConst = type_traits_detail::UnConst<T>::value };
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
enum { isArith = isIntegral || isFloat };
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
};
}}}
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__

View File

@@ -1,237 +1,237 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_UTILITY_HPP__
#define __OPENCV_GPU_UTILITY_HPP__
#include "saturate_cast.hpp"
#include "datamov_utils.hpp"
#include "detail/reduction_detail.hpp"
namespace cv { namespace gpu { namespace device
{
#define OPENCV_GPU_LOG_WARP_SIZE (5)
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
///////////////////////////////////////////////////////////////////////////////
// swap
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
{
const T temp = a;
a = b;
b = temp;
}
///////////////////////////////////////////////////////////////////////////////
// Mask Reader
struct SingleMask
{
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x] != 0;
}
PtrStepb mask;
};
struct SingleMaskChannels
{
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
: mask(mask_), channels(channels_) {}
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
:mask(mask_.mask), channels(mask_.channels){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x / channels] != 0;
}
PtrStepb mask;
int channels;
};
struct MaskCollection
{
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
: maskCollection(maskCollection_) {}
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
__device__ __forceinline__ void next()
{
curMask = *maskCollection++;
}
__device__ __forceinline__ void setMask(int z)
{
curMask = maskCollection[z];
}
__device__ __forceinline__ bool operator()(int y, int x) const
{
uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
}
const PtrStepb* maskCollection;
PtrStepb curMask;
};
struct WithOutMask
{
__device__ __forceinline__ WithOutMask(){}
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
__device__ __forceinline__ void next() const
{
}
__device__ __forceinline__ void setMask(int) const
{
}
__device__ __forceinline__ bool operator()(int, int) const
{
return true;
}
__device__ __forceinline__ bool operator()(int, int, int) const
{
return true;
}
static __device__ __forceinline__ bool check(int, int)
{
return true;
}
static __device__ __forceinline__ bool check(int, int, int, uint offset = 0)
{
return true;
}
};
///////////////////////////////////////////////////////////////////////////////
// Reduction
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
}
template <int n, typename T, typename V, typename Pred>
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
}
template <int n, typename T, typename V1, typename V2, typename Pred>
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
}
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
// solve 2x2 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
{
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
return true;
}
return false;
}
// solve 3x3 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
{
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet *
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
x[1] = saturate_cast<T>(invdet *
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
x[2] = saturate_cast<T>(invdet *
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
return true;
}
return false;
}
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_UTILITY_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_UTILITY_HPP__
#define __OPENCV_GPU_UTILITY_HPP__
#include "saturate_cast.hpp"
#include "datamov_utils.hpp"
#include "detail/reduction_detail.hpp"
namespace cv { namespace gpu { namespace device
{
#define OPENCV_GPU_LOG_WARP_SIZE (5)
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
///////////////////////////////////////////////////////////////////////////////
// swap
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
{
const T temp = a;
a = b;
b = temp;
}
///////////////////////////////////////////////////////////////////////////////
// Mask Reader
struct SingleMask
{
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x] != 0;
}
PtrStepb mask;
};
struct SingleMaskChannels
{
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
: mask(mask_), channels(channels_) {}
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
:mask(mask_.mask), channels(mask_.channels){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x / channels] != 0;
}
PtrStepb mask;
int channels;
};
struct MaskCollection
{
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
: maskCollection(maskCollection_) {}
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
__device__ __forceinline__ void next()
{
curMask = *maskCollection++;
}
__device__ __forceinline__ void setMask(int z)
{
curMask = maskCollection[z];
}
__device__ __forceinline__ bool operator()(int y, int x) const
{
uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
}
const PtrStepb* maskCollection;
PtrStepb curMask;
};
struct WithOutMask
{
__device__ __forceinline__ WithOutMask(){}
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
__device__ __forceinline__ void next() const
{
}
__device__ __forceinline__ void setMask(int) const
{
}
__device__ __forceinline__ bool operator()(int, int) const
{
return true;
}
__device__ __forceinline__ bool operator()(int, int, int) const
{
return true;
}
static __device__ __forceinline__ bool check(int, int)
{
return true;
}
static __device__ __forceinline__ bool check(int, int, int, uint offset = 0)
{
return true;
}
};
///////////////////////////////////////////////////////////////////////////////
// Reduction
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
}
template <int n, typename T, typename V, typename Pred>
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
}
template <int n, typename T, typename V1, typename V2, typename Pred>
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
}
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
// solve 2x2 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
{
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
return true;
}
return false;
}
// solve 3x3 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
{
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet *
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
x[1] = saturate_cast<T>(invdet *
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
x[2] = saturate_cast<T>(invdet *
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
return true;
}
return false;
}
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_UTILITY_HPP__

View File

@@ -1,224 +1,224 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_HPP__
#include "utility.hpp"
#include "functional.hpp"
#include "detail/vec_distance_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct L1Dist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ L1Dist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
template <> struct L1Dist<float>
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
mySum += ::fabs(val1 - val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
}
__device__ __forceinline__ operator float() const
{
return mySum;
}
float mySum;
};
struct L2Dist
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
}
__device__ __forceinline__ operator float() const
{
return sqrtf(mySum);
}
float mySum;
};
struct HammingDist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ HammingDist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
for (int i = tid; i < len; i += THREAD_DIM)
{
T1 val1;
ForceGlob<T1>::Load(vec1, i, val1);
T2 val2;
ForceGlob<T2>::Load(vec2, i, val2);
dist.reduceIter(val1, val2);
}
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
{
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
{
vec1 = vec1_;
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
}
const T1* vec1;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
{
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
{
if (glob_tid < len)
smem[glob_tid] = vec1[glob_tid];
__syncthreads();
U* vec1ValsPtr = vec1Vals;
#pragma unroll
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
*vec1ValsPtr++ = smem[i];
__syncthreads();
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
}
U vec1Vals[MAX_LEN / THREAD_DIM];
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_HPP__
#include "utility.hpp"
#include "functional.hpp"
#include "detail/vec_distance_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct L1Dist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ L1Dist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
template <> struct L1Dist<float>
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
mySum += ::fabs(val1 - val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
}
__device__ __forceinline__ operator float() const
{
return mySum;
}
float mySum;
};
struct L2Dist
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
}
__device__ __forceinline__ operator float() const
{
return sqrtf(mySum);
}
float mySum;
};
struct HammingDist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ HammingDist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
for (int i = tid; i < len; i += THREAD_DIM)
{
T1 val1;
ForceGlob<T1>::Load(vec1, i, val1);
T2 val2;
ForceGlob<T2>::Load(vec2, i, val2);
dist.reduceIter(val1, val2);
}
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
{
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
{
vec1 = vec1_;
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
}
const T1* vec1;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
{
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
{
if (glob_tid < len)
smem[glob_tid] = vec1[glob_tid];
__syncthreads();
U* vec1ValsPtr = vec1Vals;
#pragma unroll
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
*vec1ValsPtr++ = smem[i];
__syncthreads();
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
}
U vec1Vals[MAX_LEN / THREAD_DIM];
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__

View File

@@ -1,330 +1,330 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VECMATH_HPP__
#define __OPENCV_GPU_VECMATH_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vec_math_detail
{
template <int cn, typename VecD> struct SatCastHelper;
template <typename VecD> struct SatCastHelper<1, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
}
};
template <typename VecD> struct SatCastHelper<2, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
}
};
template <typename VecD> struct SatCastHelper<3, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
}
};
template <typename VecD> struct SatCastHelper<4, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
};
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
{
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
}
}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
}
namespace vec_math_detail
{
template <typename T1, typename T2> struct BinOpTraits
{
typedef int argument_type;
};
template <typename T> struct BinOpTraits<T, T>
{
typedef T argument_type;
};
template <typename T> struct BinOpTraits<T, double>
{
typedef double argument_type;
};
template <typename T> struct BinOpTraits<double, T>
{
typedef double argument_type;
};
template <> struct BinOpTraits<double, double>
{
typedef double argument_type;
};
template <typename T> struct BinOpTraits<T, float>
{
typedef float argument_type;
};
template <typename T> struct BinOpTraits<float, T>
{
typedef float argument_type;
};
template <> struct BinOpTraits<float, float>
{
typedef float argument_type;
};
template <> struct BinOpTraits<double, float>
{
typedef double argument_type;
};
template <> struct BinOpTraits<float, double>
{
typedef double argument_type;
};
}
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
}
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VECMATH_HPP__
#define __OPENCV_GPU_VECMATH_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vec_math_detail
{
template <int cn, typename VecD> struct SatCastHelper;
template <typename VecD> struct SatCastHelper<1, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
}
};
template <typename VecD> struct SatCastHelper<2, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
}
};
template <typename VecD> struct SatCastHelper<3, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
}
};
template <typename VecD> struct SatCastHelper<4, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
};
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
{
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
}
}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
}
namespace vec_math_detail
{
template <typename T1, typename T2> struct BinOpTraits
{
typedef int argument_type;
};
template <typename T> struct BinOpTraits<T, T>
{
typedef T argument_type;
};
template <typename T> struct BinOpTraits<T, double>
{
typedef double argument_type;
};
template <typename T> struct BinOpTraits<double, T>
{
typedef double argument_type;
};
template <> struct BinOpTraits<double, double>
{
typedef double argument_type;
};
template <typename T> struct BinOpTraits<T, float>
{
typedef float argument_type;
};
template <typename T> struct BinOpTraits<float, T>
{
typedef float argument_type;
};
template <> struct BinOpTraits<float, float>
{
typedef float argument_type;
};
template <> struct BinOpTraits<double, float>
{
typedef double argument_type;
};
template <> struct BinOpTraits<float, double>
{
typedef double argument_type;
};
}
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
}
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VECMATH_HPP__

View File

@@ -1,280 +1,280 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
#define __OPENCV_GPU_VEC_TRAITS_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename T, int N> struct TypeVec;
struct __align__(8) uchar8
{
uchar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
{
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(8) char8
{
schar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
{
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) ushort8
{
ushort a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
{
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) short8
{
short a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
{
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) uint8
{
uint a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
{
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) int8
{
int a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) float8
{
float a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
{
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct double8
{
double a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
{
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
template<typename T> struct VecTraits;
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
template<> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \
template<> struct VecTraits<type ## 8> \
{ \
typedef type elem_type; \
enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
#define __OPENCV_GPU_VEC_TRAITS_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename T, int N> struct TypeVec;
struct __align__(8) uchar8
{
uchar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
{
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(8) char8
{
schar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
{
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) ushort8
{
ushort a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
{
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) short8
{
short a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
{
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) uint8
{
uint a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
{
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) int8
{
int a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) float8
{
float a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
{
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct double8
{
double a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
{
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
template<typename T> struct VecTraits;
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
template<> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \
template<> struct VecTraits<type ## 8> \
{ \
typedef type elem_type; \
enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__

View File

@@ -1,112 +1,112 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
#define __OPENCV_GPU_DEVICE_WARP_HPP__
namespace cv { namespace gpu { namespace device
{
struct Warp
{
enum
{
LOG_WARP_SIZE = 5,
WARP_SIZE = 1 << LOG_WARP_SIZE,
STRIDE = WARP_SIZE
};
/** \brief Returns the warp lane ID of the calling thread. */
static __device__ __forceinline__ unsigned int laneId()
{
unsigned int ret;
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
return ret;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
for(It t = beg + laneId(); t < end; t += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = *t;
return out;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = op(*t);
return out;
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
unsigned int lane = laneId();
InIt1 t1 = beg1 + lane;
InIt2 t2 = beg2 + lane;
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
*out = op(*t1, *t2);
return out;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
unsigned int lane = laneId();
value += lane;
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
};
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
#define __OPENCV_GPU_DEVICE_WARP_HPP__
namespace cv { namespace gpu { namespace device
{
struct Warp
{
enum
{
LOG_WARP_SIZE = 5,
WARP_SIZE = 1 << LOG_WARP_SIZE,
STRIDE = WARP_SIZE
};
/** \brief Returns the warp lane ID of the calling thread. */
static __device__ __forceinline__ unsigned int laneId()
{
unsigned int ret;
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
return ret;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
for(It t = beg + laneId(); t < end; t += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = *t;
return out;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = op(*t);
return out;
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
unsigned int lane = laneId();
InIt1 t1 = beg1 + lane;
InIt2 t2 = beg2 + lane;
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
*out = op(*t1, *t2);
return out;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
unsigned int lane = laneId();
value += lane;
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
};
}}} // namespace cv { namespace gpu { namespace device
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */

View File

@@ -1,69 +1,69 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
#define OPENCV_GPU_WARP_REDUCE_HPP__
namespace cv { namespace gpu { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace gpu { namespace device {
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
#define OPENCV_GPU_WARP_REDUCE_HPP__
namespace cv { namespace gpu { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace gpu { namespace device {
#endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */

View File

@@ -1,43 +1,43 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda_devptrs.hpp"
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda_devptrs.hpp"

File diff suppressed because it is too large Load Diff

View File

@@ -1,43 +1,43 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/gpumat.hpp"
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/gpumat.hpp"

View File

@@ -1,64 +1,64 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#include "opencv2/gpu/gpu.hpp"
#include "cuda_runtime_api.h"
namespace cv
{
namespace gpu
{
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
};
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#include "opencv2/gpu/gpu.hpp"
#include "cuda_runtime_api.h"
namespace cv
{
namespace gpu
{
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
};
}
}
#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */

0
modules/gpu/misc/mark_nvidia.py Normal file → Executable file
View File

View File

@@ -1,381 +1,381 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// StereoBM
typedef std::tr1::tuple<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
{
declare.time(5.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int preset = 0;
const int ndisp = 256;
if (PERF_RUN_GPU())
{
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bm(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_bm(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::StereoBM bm(preset, ndisp);
cv::Mat dst;
bm(imgLeft, imgRight, dst);
TEST_CYCLE()
{
bm(imgLeft, imgRight, dst);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
declare.time(10.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0));
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1));
ASSERT_FALSE(imgRight.empty());
const int ndisp = 64;
if (PERF_RUN_GPU())
{
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_bp(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
declare.time(10.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int ndisp = 128;
if (PERF_RUN_GPU())
{
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_csbp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_csbp(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// DisparityBilateralFilter
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
{
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp.empty());
const int ndisp = 128;
if (PERF_RUN_GPU())
{
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat d_dst;
d_filter(d_disp, d_img, d_dst);
TEST_CYCLE()
{
d_filter(d_disp, d_img, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// TransformPoints
DEF_PARAM_TEST_1(Count, int);
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
TEST_CYCLE()
{
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// ProjectPoints
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
TEST_CYCLE()
{
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
{
declare.time(10.0);
const int count = GetParam();
cv::Mat object(1, count, CV_32FC3);
fillRandom(object, -100, 100);
cv::Mat camera_mat(3, 3, CV_32FC1);
fillRandom(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fillRandom(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fillRandom(tvec_gold, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
cv::Mat rvec;
cv::Mat tvec;
if (PERF_RUN_GPU())
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
else
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
CPU_SANITY_CHECK(rvec);
CPU_SANITY_CHECK(tvec);
}
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
cv::Mat src(size, depth);
fillRandom(src, 5.0, 30.0);
cv::Mat Q(4, 4, CV_32FC1);
fillRandom(Q, 0.1, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
TEST_CYCLE()
{
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// DrawColorDisp
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src, 0, 255);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::drawColorDisp(d_src, d_dst, 255);
TEST_CYCLE()
{
cv::gpu::drawColorDisp(d_src, d_dst, 255);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
} // namespace
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// StereoBM
typedef std::tr1::tuple<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM, Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
{
declare.time(5.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int preset = 0;
const int ndisp = 256;
if (PERF_RUN_GPU())
{
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bm(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_bm(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::StereoBM bm(preset, ndisp);
cv::Mat dst;
bm(imgLeft, imgRight, dst);
TEST_CYCLE()
{
bm(imgLeft, imgRight, dst);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation, Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
declare.time(10.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0));
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1));
ASSERT_FALSE(imgRight.empty());
const int ndisp = 64;
if (PERF_RUN_GPU())
{
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_bp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_bp(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
declare.time(10.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int ndisp = 128;
if (PERF_RUN_GPU())
{
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
cv::gpu::GpuMat d_imgLeft(imgLeft);
cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat d_dst;
d_csbp(d_imgLeft, d_imgRight, d_dst);
TEST_CYCLE()
{
d_csbp(d_imgLeft, d_imgRight, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// DisparityBilateralFilter
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter, Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
{
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp.empty());
const int ndisp = 128;
if (PERF_RUN_GPU())
{
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat d_dst;
d_filter(d_disp, d_img, d_dst);
TEST_CYCLE()
{
d_filter(d_disp, d_img, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// TransformPoints
DEF_PARAM_TEST_1(Count, int);
PERF_TEST_P(Count, Calib3D_TransformPoints, Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
TEST_CYCLE()
{
cv::gpu::transformPoints(d_src, rvec, tvec, d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
//////////////////////////////////////////////////////////////////////
// ProjectPoints
PERF_TEST_P(Count, Calib3D_ProjectPoints, Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
fillRandom(src, -100, 100);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
TEST_CYCLE()
{
cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), d_dst);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
TEST_CYCLE()
{
cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
PERF_TEST_P(Count, Calib3D_SolvePnPRansac, Values(5000, 10000, 20000))
{
declare.time(10.0);
const int count = GetParam();
cv::Mat object(1, count, CV_32FC3);
fillRandom(object, -100, 100);
cv::Mat camera_mat(3, 3, CV_32FC1);
fillRandom(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold(1, 3, CV_32FC1);
fillRandom(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
fillRandom(tvec_gold, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
cv::Mat rvec;
cv::Mat tvec;
if (PERF_RUN_GPU())
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
else
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
TEST_CYCLE()
{
cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
}
}
CPU_SANITY_CHECK(rvec);
CPU_SANITY_CHECK(tvec);
}
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
cv::Mat src(size, depth);
fillRandom(src, 5.0, 30.0);
cv::Mat Q(4, 4, CV_32FC1);
fillRandom(Q, 0.1, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
TEST_CYCLE()
{
cv::gpu::reprojectImageTo3D(d_src, d_dst, Q);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::reprojectImageTo3D(src, dst, Q);
TEST_CYCLE()
{
cv::reprojectImageTo3D(src, dst, Q);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// DrawColorDisp
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src, 0, 255);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::drawColorDisp(d_src, d_dst, 255);
TEST_CYCLE()
{
cv::gpu::drawColorDisp(d_src, d_dst, 255);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
FAIL() << "No such CPU implementation analogy.";
}
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -1,309 +1,309 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SURF
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
{
declare.time(50.0);
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::SURF_GPU d_surf;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
GPU_SANITY_CHECK(d_descriptors, 1e-4);
GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
}
else
{
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK(descriptors, 1e-4);
}
}
//////////////////////////////////////////////////////////////////////
// FAST
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::FAST_GPU d_fast(20);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints;
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
TEST_CYCLE()
{
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
}
GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
}
else
{
std::vector<cv::KeyPoint> keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
SANITY_CHECK_KEYPOINTS(keypoints);
}
}
//////////////////////////////////////////////////////////////////////
// ORB
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::ORB_GPU d_orb(4000);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
GPU_SANITY_CHECK(d_descriptors);
}
else
{
cv::ORB orb(4000);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK(descriptors);
}
}
//////////////////////////////////////////////////////////////////////
// BFMatch
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(20.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE()
{
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
}
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
}
else
{
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
matcher.match(query, train, matches);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
SANITY_CHECK(matches);
}
}
//////////////////////////////////////////////////////////////////////
// BFKnnMatch
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
Values(64, 128, 256),
Values(2, 3),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(30.0);
int desc_size = GET_PARAM(0);
int k = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE()
{
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
}
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(query, train, matches, k);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
SANITY_CHECK(matches);
}
}
//////////////////////////////////////////////////////////////////////
// BFRadiusMatch
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(30.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query, 0.0, 1.0);
cv::Mat train(3000, desc_size, type);
fillRandom(train, 0.0, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
TEST_CYCLE()
{
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
}
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(query, train, matches, 2.0);
TEST_CYCLE()
{
matcher.radiusMatch(query, train, matches, 2.0);
}
SANITY_CHECK(matches);
}
}
} // namespace
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SURF
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, Features2D_SURF, Values<string>("gpu/perf/aloe.png"))
{
declare.time(50.0);
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::SURF_GPU d_surf;
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
GPU_SANITY_CHECK(d_descriptors, 1e-4);
GPU_SANITY_CHECK_KEYPOINTS(SURF, d_keypoints);
}
else
{
cv::SURF surf;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
surf(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
surf(img, cv::noArray(), keypoints, descriptors);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK(descriptors, 1e-4);
}
}
//////////////////////////////////////////////////////////////////////
// FAST
PERF_TEST_P(Image, Features2D_FAST, Values<string>("gpu/perf/aloe.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::FAST_GPU d_fast(20);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints;
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
TEST_CYCLE()
{
d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
}
GPU_SANITY_CHECK_RESPONSE(FAST, d_keypoints);
}
else
{
std::vector<cv::KeyPoint> keypoints;
cv::FAST(img, keypoints, 20);
TEST_CYCLE()
{
keypoints.clear();
cv::FAST(img, keypoints, 20);
}
SANITY_CHECK_KEYPOINTS(keypoints);
}
}
//////////////////////////////////////////////////////////////////////
// ORB
PERF_TEST_P(Image, Features2D_ORB, Values<string>("gpu/perf/aloe.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::ORB_GPU d_orb(4000);
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_keypoints, d_descriptors;
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
TEST_CYCLE()
{
d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
}
GPU_SANITY_CHECK_KEYPOINTS(ORB, d_keypoints);
GPU_SANITY_CHECK(d_descriptors);
}
else
{
cv::ORB orb(4000);
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
orb(img, cv::noArray(), keypoints, descriptors);
TEST_CYCLE()
{
keypoints.clear();
orb(img, cv::noArray(), keypoints, descriptors);
}
SANITY_CHECK_KEYPOINTS(keypoints);
SANITY_CHECK(descriptors);
}
}
//////////////////////////////////////////////////////////////////////
// BFMatch
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(20.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
TEST_CYCLE()
{
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
}
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
}
else
{
cv::BFMatcher matcher(normType);
std::vector<cv::DMatch> matches;
matcher.match(query, train, matches);
TEST_CYCLE()
{
matcher.match(query, train, matches);
}
SANITY_CHECK(matches);
}
}
//////////////////////////////////////////////////////////////////////
// BFKnnMatch
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch, Combine(
Values(64, 128, 256),
Values(2, 3),
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(30.0);
int desc_size = GET_PARAM(0);
int k = GET_PARAM(1);
int normType = GET_PARAM(2);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query);
cv::Mat train(3000, desc_size, type);
fillRandom(train);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
TEST_CYCLE()
{
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
}
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.knnMatch(query, train, matches, k);
TEST_CYCLE()
{
matcher.knnMatch(query, train, matches, k);
}
SANITY_CHECK(matches);
}
}
//////////////////////////////////////////////////////////////////////
// BFRadiusMatch
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch, Combine(Values(64, 128, 256), Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
{
declare.time(30.0);
int desc_size = GET_PARAM(0);
int normType = GET_PARAM(1);
int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
cv::Mat query(3000, desc_size, type);
fillRandom(query, 0.0, 1.0);
cv::Mat train(3000, desc_size, type);
fillRandom(train, 0.0, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::BFMatcher_GPU d_matcher(normType);
cv::gpu::GpuMat d_query(query);
cv::gpu::GpuMat d_train(train);
cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
TEST_CYCLE()
{
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, 2.0);
}
GPU_SANITY_CHECK(d_trainIdx);
GPU_SANITY_CHECK(d_distance);
}
else
{
cv::BFMatcher matcher(normType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(query, train, matches, 2.0);
TEST_CYCLE()
{
matcher.radiusMatch(query, train, matches, 2.0);
}
SANITY_CHECK(matches);
}
}
} // namespace

View File

@@ -1,415 +1,415 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// Blur
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Sobel
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
TEST_CYCLE()
{
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Scharr
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
TEST_CYCLE()
{
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// GaussianBlur
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
TEST_CYCLE()
{
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Laplacian
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
TEST_CYCLE()
{
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Erode
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::erode(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::erode(d_src, d_dst, ker, d_buf);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::erode(src, dst, ker);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Dilate
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::dilate(src, dst, ker);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// MorphologyEx
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int morphOp = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf1;
cv::gpu::GpuMat d_buf2;
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
TEST_CYCLE()
{
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::morphologyEx(src, dst, morphOp, ker);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Filter2D
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fillRandom(kernel, 0.0, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
TEST_CYCLE()
{
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
CPU_SANITY_CHECK(dst);
}
}
} // namespace
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// Blur
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), Values(3, 5, 7)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::gpu::blur(d_src, d_dst, cv::Size(ksize, ksize));
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::blur(src, dst, cv::Size(ksize, ksize));
TEST_CYCLE()
{
cv::blur(src, dst, cv::Size(ksize, ksize));
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Sobel
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
TEST_CYCLE()
{
cv::gpu::Sobel(d_src, d_dst, -1, 1, 1, d_buf, ksize);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::Sobel(src, dst, -1, 1, 1, ksize);
TEST_CYCLE()
{
cv::Sobel(src, dst, -1, 1, 1, ksize);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Scharr
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
TEST_CYCLE()
{
cv::gpu::Scharr(d_src, d_dst, -1, 1, 0, d_buf);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::Scharr(src, dst, -1, 1, 0);
TEST_CYCLE()
{
cv::Scharr(src, dst, -1, 1, 0);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// GaussianBlur
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
TEST_CYCLE()
{
cv::gpu::GaussianBlur(d_src, d_dst, cv::Size(ksize, ksize), d_buf, 0.5);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
TEST_CYCLE()
{
cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Laplacian
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
TEST_CYCLE()
{
cv::gpu::Laplacian(d_src, d_dst, -1, ksize);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::Laplacian(src, dst, -1, ksize);
TEST_CYCLE()
{
cv::Laplacian(src, dst, -1, ksize);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Erode
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::erode(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::erode(d_src, d_dst, ker, d_buf);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::erode(src, dst, ker);
TEST_CYCLE()
{
cv::erode(src, dst, ker);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Dilate
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf;
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
TEST_CYCLE()
{
cv::gpu::dilate(d_src, d_dst, ker, d_buf);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::dilate(src, dst, ker);
TEST_CYCLE()
{
cv::dilate(src, dst, ker);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// MorphologyEx
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
#define ALL_MORPH_OPS ValuesIn(MorphOp::all())
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), ALL_MORPH_OPS))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int morphOp = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::GpuMat d_buf1;
cv::gpu::GpuMat d_buf2;
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
TEST_CYCLE()
{
cv::gpu::morphologyEx(d_src, d_dst, morphOp, ker, d_buf1, d_buf2);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::morphologyEx(src, dst, morphOp, ker);
TEST_CYCLE()
{
cv::morphologyEx(src, dst, morphOp, ker);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Filter2D
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
cv::Size size = GET_PARAM(0);
int type = GET_PARAM(1);
int ksize = GET_PARAM(2);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat kernel(ksize, ksize, CV_32FC1);
fillRandom(kernel, 0.0, 1.0);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
TEST_CYCLE()
{
cv::gpu::filter2D(d_src, d_dst, -1, kernel);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
cv::filter2D(src, dst, -1, kernel);
TEST_CYCLE()
{
cv::filter2D(src, dst, -1, kernel);
}
CPU_SANITY_CHECK(dst);
}
}
} // namespace

File diff suppressed because it is too large Load Diff

View File

@@ -32,8 +32,8 @@ struct GreedyLabeling
return lo <= d && d <= hi;
}
private:
InInterval& operator=(const InInterval&);
private:
InInterval& operator=(const InInterval&);
};

View File

@@ -1,185 +1,185 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SetTo
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(size, type);
d_src.setTo(val);
TEST_CYCLE()
{
d_src.setTo(val);
}
GPU_SANITY_CHECK(d_src);
}
else
{
cv::Mat src(size, type);
src.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
CPU_SANITY_CHECK(src);
}
}
//////////////////////////////////////////////////////////////////////
// SetToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
d_src.setTo(val, d_mask);
TEST_CYCLE()
{
d_src.setTo(val, d_mask);
}
GPU_SANITY_CHECK(d_src);
}
else
{
src.setTo(val, mask);
TEST_CYCLE()
{
src.setTo(val, mask);
}
CPU_SANITY_CHECK(src);
}
}
//////////////////////////////////////////////////////////////////////
// CopyToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat d_dst;
d_src.copyTo(d_dst, d_mask);
TEST_CYCLE()
{
d_src.copyTo(d_dst, d_mask);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// ConvertTo
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
{
cv::Size size = GET_PARAM(0);
int depth1 = GET_PARAM(1);
int depth2 = GET_PARAM(2);
cv::Mat src(size, depth1);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
CPU_SANITY_CHECK(dst);
}
}
} // namespace
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
//////////////////////////////////////////////////////////////////////
// SetTo
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(size, type);
d_src.setTo(val);
TEST_CYCLE()
{
d_src.setTo(val);
}
GPU_SANITY_CHECK(d_src);
}
else
{
cv::Mat src(size, type);
src.setTo(val);
TEST_CYCLE()
{
src.setTo(val);
}
CPU_SANITY_CHECK(src);
}
}
//////////////////////////////////////////////////////////////////////
// SetToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
cv::Scalar val(1, 2, 3, 4);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
d_src.setTo(val, d_mask);
TEST_CYCLE()
{
d_src.setTo(val, d_mask);
}
GPU_SANITY_CHECK(d_src);
}
else
{
src.setTo(val, mask);
TEST_CYCLE()
{
src.setTo(val, mask);
}
CPU_SANITY_CHECK(src);
}
}
//////////////////////////////////////////////////////////////////////
// CopyToMasked
PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), GPU_CHANNELS_1_3_4))
{
cv::Size size = GET_PARAM(0);
int depth = GET_PARAM(1);
int channels = GET_PARAM(2);
int type = CV_MAKE_TYPE(depth, channels);
cv::Mat src(size, type);
fillRandom(src);
cv::Mat mask(size, CV_8UC1);
fillRandom(mask, 0, 2);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_mask(mask);
cv::gpu::GpuMat d_dst;
d_src.copyTo(d_dst, d_mask);
TEST_CYCLE()
{
d_src.copyTo(d_dst, d_mask);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
src.copyTo(dst, mask);
TEST_CYCLE()
{
src.copyTo(dst, mask);
}
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// ConvertTo
DEF_PARAM_TEST(Sz_2Depth, cv::Size, MatDepth, MatDepth);
PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U, CV_16U, CV_32F, CV_64F), Values(CV_8U, CV_16U, CV_32F, CV_64F)))
{
cv::Size size = GET_PARAM(0);
int depth1 = GET_PARAM(1);
int depth2 = GET_PARAM(2);
cv::Mat src(size, depth1);
fillRandom(src);
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat d_dst;
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
d_src.convertTo(d_dst, depth2, 0.5, 1.0);
}
GPU_SANITY_CHECK(d_dst);
}
else
{
cv::Mat dst;
src.convertTo(dst, depth2, 0.5, 1.0);
TEST_CYCLE()
{
src.convertTo(dst, depth2, 0.5, 1.0);
}
CPU_SANITY_CHECK(dst);
}
}
} // namespace

View File

@@ -1,184 +1,184 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
///////////////////////////////////////////////////////////////
// HOG
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
SANITY_CHECK(found_locations);
}
//===========test for CalTech data =============//
DEF_PARAM_TEST_1(HOG, string);
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
SANITY_CHECK(found_locations);
}
///////////////////////////////////////////////////////////////
// HaarClassifier
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_objects_buffer;
d_cascade.detectMultiScale(d_img, d_objects_buffer);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_objects_buffer);
}
GPU_SANITY_CHECK(d_objects_buffer);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects);
}
}
///////////////////////////////////////////////////////////////
// LBP cascade
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_gpu_rects;
d_cascade.detectMultiScale(d_img, d_gpu_rects);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_gpu_rects);
}
GPU_SANITY_CHECK(d_gpu_rects);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects);
}
}
} // namespace
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
namespace {
///////////////////////////////////////////////////////////////
// HOG
DEF_PARAM_TEST_1(Image, string);
PERF_TEST_P(Image, ObjDetect_HOG, Values<string>("gpu/hog/road.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
SANITY_CHECK(found_locations);
}
//===========test for CalTech data =============//
DEF_PARAM_TEST_1(HOG, string);
PERF_TEST_P(HOG, CalTech, Values<string>("gpu/caltech/image_00000009_0.png", "gpu/caltech/image_00000032_0.png",
"gpu/caltech/image_00000165_0.png", "gpu/caltech/image_00000261_0.png", "gpu/caltech/image_00000469_0.png",
"gpu/caltech/image_00000527_0.png", "gpu/caltech/image_00000574_0.png"))
{
cv::Mat img = readImage(GetParam(), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
std::vector<cv::Rect> found_locations;
if (PERF_RUN_GPU())
{
cv::gpu::GpuMat d_img(img);
cv::gpu::HOGDescriptor d_hog;
d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
d_hog.detectMultiScale(d_img, found_locations);
TEST_CYCLE()
{
d_hog.detectMultiScale(d_img, found_locations);
}
}
else
{
cv::HOGDescriptor hog;
hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
hog.detectMultiScale(img, found_locations);
TEST_CYCLE()
{
hog.detectMultiScale(img, found_locations);
}
}
SANITY_CHECK(found_locations);
}
///////////////////////////////////////////////////////////////
// HaarClassifier
typedef pair<string, string> pair_string;
DEF_PARAM_TEST_1(ImageAndCascade, pair_string);
PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/perf/haarcascade_frontalface_alt.xml")))
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_objects_buffer;
d_cascade.detectMultiScale(d_img, d_objects_buffer);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_objects_buffer);
}
GPU_SANITY_CHECK(d_objects_buffer);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects);
}
}
///////////////////////////////////////////////////////////////
// LBP cascade
PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
Values<pair_string>(make_pair("gpu/haarcascade/group_1_640x480_VGA.pgm", "gpu/lbpcascade/lbpcascade_frontalface.xml")))
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (PERF_RUN_GPU())
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
cv::gpu::GpuMat d_img(img);
cv::gpu::GpuMat d_gpu_rects;
d_cascade.detectMultiScale(d_img, d_gpu_rects);
TEST_CYCLE()
{
d_cascade.detectMultiScale(d_img, d_gpu_rects);
}
GPU_SANITY_CHECK(d_gpu_rects);
}
else
{
cv::CascadeClassifier cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
std::vector<cv::Rect> rects;
cascade.detectMultiScale(img, rects);
TEST_CYCLE()
{
cascade.detectMultiScale(img, rects);
}
CPU_SANITY_CHECK(rects);
}
}
} // namespace

View File

@@ -1,37 +1,37 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# pragma GCC diagnostic ignored "-Wmissing-prototypes" //OSX
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include <cstdio>
#include <iostream>
#include "cvconfig.h"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#endif
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "opencv2/photo/photo.hpp"
#include "utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif
#endif
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# pragma GCC diagnostic ignored "-Wmissing-prototypes" //OSX
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include <cstdio>
#include <iostream>
#include "cvconfig.h"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#endif
#include "opencv2/ts/ts.hpp"
#include "opencv2/ts/ts_perf.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/legacy/legacy.hpp"
#include "opencv2/photo/photo.hpp"
#include "utility.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,191 +1,191 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
void fillRandom(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
{
"BGR2BGRA",
"BGRA2BGR",
"BGR2RGBA",
"RGBA2BGR",
"BGR2RGB",
"BGRA2RGBA",
"BGR2GRAY",
"RGB2GRAY",
"GRAY2BGR",
"GRAY2BGRA",
"BGRA2GRAY",
"RGBA2GRAY",
"BGR2BGR565",
"RGB2BGR565",
"BGR5652BGR",
"BGR5652RGB",
"BGRA2BGR565",
"RGBA2BGR565",
"BGR5652BGRA",
"BGR5652RGBA",
"GRAY2BGR565",
"BGR5652GRAY",
"BGR2BGR555",
"RGB2BGR555",
"BGR5552BGR",
"BGR5552RGB",
"BGRA2BGR555",
"RGBA2BGR555",
"BGR5552BGRA",
"BGR5552RGBA",
"GRAY2BGR555",
"BGR5552GRAY",
"BGR2XYZ",
"RGB2XYZ",
"XYZ2BGR",
"XYZ2RGB",
"BGR2YCrCb",
"RGB2YCrCb",
"YCrCb2BGR",
"YCrCb2RGB",
"BGR2HSV",
"RGB2HSV",
"",
"",
"BGR2Lab",
"RGB2Lab",
"BayerBG2BGR",
"BayerGB2BGR",
"BayerRG2BGR",
"BayerGR2BGR",
"BGR2Luv",
"RGB2Luv",
"BGR2HLS",
"RGB2HLS",
"HSV2BGR",
"HSV2RGB",
"Lab2BGR",
"Lab2RGB",
"Luv2BGR",
"Luv2RGB",
"HLS2BGR",
"HLS2RGB",
"BayerBG2BGR_VNG",
"BayerGB2BGR_VNG",
"BayerRG2BGR_VNG",
"BayerGR2BGR_VNG",
"BGR2HSV_FULL",
"RGB2HSV_FULL",
"BGR2HLS_FULL",
"RGB2HLS_FULL",
"HSV2BGR_FULL",
"HSV2RGB_FULL",
"HLS2BGR_FULL",
"HLS2RGB_FULL",
"LBGR2Lab",
"LRGB2Lab",
"LBGR2Luv",
"LRGB2Luv",
"Lab2LBGR",
"Lab2LRGB",
"Luv2LBGR",
"Luv2LRGB",
"BGR2YUV",
"RGB2YUV",
"YUV2BGR",
"YUV2RGB",
"BayerBG2GRAY",
"BayerGB2GRAY",
"BayerRG2GRAY",
"BayerGR2GRAY",
//YUV 4:2:0 formats family
"YUV2RGB_NV12",
"YUV2BGR_NV12",
"YUV2RGB_NV21",
"YUV2BGR_NV21",
"YUV2RGBA_NV12",
"YUV2BGRA_NV12",
"YUV2RGBA_NV21",
"YUV2BGRA_NV21",
"YUV2RGB_YV12",
"YUV2BGR_YV12",
"YUV2RGB_IYUV",
"YUV2BGR_IYUV",
"YUV2RGBA_YV12",
"YUV2BGRA_YV12",
"YUV2RGBA_IYUV",
"YUV2BGRA_IYUV",
"YUV2GRAY_420",
//YUV 4:2:2 formats family
"YUV2RGB_UYVY",
"YUV2BGR_UYVY",
"YUV2RGB_VYUY",
"YUV2BGR_VYUY",
"YUV2RGBA_UYVY",
"YUV2BGRA_UYVY",
"YUV2RGBA_VYUY",
"YUV2BGRA_VYUY",
"YUV2RGB_YUY2",
"YUV2BGR_YUY2",
"YUV2RGB_YVYU",
"YUV2BGR_YVYU",
"YUV2RGBA_YUY2",
"YUV2BGRA_YUY2",
"YUV2RGBA_YVYU",
"YUV2BGRA_YVYU",
"YUV2GRAY_UYVY",
"YUV2GRAY_YUY2",
// alpha premultiplication
"RGBA2mRGBA",
"mRGBA2RGBA",
"COLORCVT_MAX"
};
*os << str[info.code];
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
void fillRandom(Mat& m, double a, double b)
{
RNG rng(123456789);
rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
}
Mat readImage(const string& fileName, int flags)
{
return imread(perf::TestBase::getDataPath(fileName), flags);
}
void PrintTo(const CvtColorInfo& info, ostream* os)
{
static const char* str[] =
{
"BGR2BGRA",
"BGRA2BGR",
"BGR2RGBA",
"RGBA2BGR",
"BGR2RGB",
"BGRA2RGBA",
"BGR2GRAY",
"RGB2GRAY",
"GRAY2BGR",
"GRAY2BGRA",
"BGRA2GRAY",
"RGBA2GRAY",
"BGR2BGR565",
"RGB2BGR565",
"BGR5652BGR",
"BGR5652RGB",
"BGRA2BGR565",
"RGBA2BGR565",
"BGR5652BGRA",
"BGR5652RGBA",
"GRAY2BGR565",
"BGR5652GRAY",
"BGR2BGR555",
"RGB2BGR555",
"BGR5552BGR",
"BGR5552RGB",
"BGRA2BGR555",
"RGBA2BGR555",
"BGR5552BGRA",
"BGR5552RGBA",
"GRAY2BGR555",
"BGR5552GRAY",
"BGR2XYZ",
"RGB2XYZ",
"XYZ2BGR",
"XYZ2RGB",
"BGR2YCrCb",
"RGB2YCrCb",
"YCrCb2BGR",
"YCrCb2RGB",
"BGR2HSV",
"RGB2HSV",
"",
"",
"BGR2Lab",
"RGB2Lab",
"BayerBG2BGR",
"BayerGB2BGR",
"BayerRG2BGR",
"BayerGR2BGR",
"BGR2Luv",
"RGB2Luv",
"BGR2HLS",
"RGB2HLS",
"HSV2BGR",
"HSV2RGB",
"Lab2BGR",
"Lab2RGB",
"Luv2BGR",
"Luv2RGB",
"HLS2BGR",
"HLS2RGB",
"BayerBG2BGR_VNG",
"BayerGB2BGR_VNG",
"BayerRG2BGR_VNG",
"BayerGR2BGR_VNG",
"BGR2HSV_FULL",
"RGB2HSV_FULL",
"BGR2HLS_FULL",
"RGB2HLS_FULL",
"HSV2BGR_FULL",
"HSV2RGB_FULL",
"HLS2BGR_FULL",
"HLS2RGB_FULL",
"LBGR2Lab",
"LRGB2Lab",
"LBGR2Luv",
"LRGB2Luv",
"Lab2LBGR",
"Lab2LRGB",
"Luv2LBGR",
"Luv2LRGB",
"BGR2YUV",
"RGB2YUV",
"YUV2BGR",
"YUV2RGB",
"BayerBG2GRAY",
"BayerGB2GRAY",
"BayerRG2GRAY",
"BayerGR2GRAY",
//YUV 4:2:0 formats family
"YUV2RGB_NV12",
"YUV2BGR_NV12",
"YUV2RGB_NV21",
"YUV2BGR_NV21",
"YUV2RGBA_NV12",
"YUV2BGRA_NV12",
"YUV2RGBA_NV21",
"YUV2BGRA_NV21",
"YUV2RGB_YV12",
"YUV2BGR_YV12",
"YUV2RGB_IYUV",
"YUV2BGR_IYUV",
"YUV2RGBA_YV12",
"YUV2BGRA_YV12",
"YUV2RGBA_IYUV",
"YUV2BGRA_IYUV",
"YUV2GRAY_420",
//YUV 4:2:2 formats family
"YUV2RGB_UYVY",
"YUV2BGR_UYVY",
"YUV2RGB_VYUY",
"YUV2BGR_VYUY",
"YUV2RGBA_UYVY",
"YUV2BGRA_UYVY",
"YUV2RGBA_VYUY",
"YUV2BGRA_VYUY",
"YUV2RGB_YUY2",
"YUV2BGR_YUY2",
"YUV2RGB_YVYU",
"YUV2BGR_YVYU",
"YUV2RGBA_YUY2",
"YUV2BGRA_YUY2",
"YUV2RGBA_YVYU",
"YUV2BGRA_YVYU",
"YUV2GRAY_UYVY",
"YUV2GRAY_YUY2",
// alpha premultiplication
"RGBA2mRGBA",
"mRGBA2RGBA",
"COLORCVT_MAX"
};
*os << str[info.code];
}

View File

@@ -1,84 +1,84 @@
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp"
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
DEF_PARAM_TEST_1(Sz, cv::Size);
typedef perf::Size_MatType Sz_Type;
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
#define GPU_SANITY_CHECK(dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
} while(0)
#define CPU_SANITY_CHECK(cmat, ...) \
do{ \
SANITY_CHECK(cmat, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
} while(0)
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__
#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
#define __OPENCV_PERF_GPU_UTILITY_HPP__
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ts/ts_perf.hpp"
void fillRandom(cv::Mat& m, double a = 0.0, double b = 255.0);
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
using perf::MatType;
using perf::MatDepth;
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_MODES testing::ValuesIn(BorderMode::all())
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
#define ALL_INTERPOLATIONS testing::ValuesIn(Interpolation::all())
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
const int Gray = 1, TwoChannel = 2, BGR = 3, BGRA = 4;
CV_ENUM(MatCn, Gray, TwoChannel, BGR, BGRA)
#define GPU_CHANNELS_1_3_4 testing::Values(Gray, BGR, BGRA)
#define GPU_CHANNELS_1_3 testing::Values(Gray, BGR)
struct CvtColorInfo
{
int scn;
int dcn;
int code;
explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
};
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
DEF_PARAM_TEST_1(Sz, cv::Size);
typedef perf::Size_MatType Sz_Type;
DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth);
DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn);
#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p)
#define GPU_SANITY_CHECK(dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
SANITY_CHECK(d##dmat, ## __VA_ARGS__); \
} while(0)
#define CPU_SANITY_CHECK(cmat, ...) \
do{ \
SANITY_CHECK(cmat, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_KEYPOINTS(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __pt_x = d##dmat.row(cv::gpu::alg##_GPU::X_ROW); \
cv::Mat __pt_y = d##dmat.row(cv::gpu::alg##_GPU::Y_ROW); \
cv::Mat __angle = d##dmat.row(cv::gpu::alg##_GPU::ANGLE_ROW); \
cv::Mat __octave = d##dmat.row(cv::gpu::alg##_GPU::OCTAVE_ROW); \
cv::Mat __size = d##dmat.row(cv::gpu::alg##_GPU::SIZE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-x-row", __pt_x, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-y-row", __pt_y, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-angle-row", __angle, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "octave-row", __octave, ## __VA_ARGS__); \
::perf::Regression::add(this, std::string(#dmat) + "-pt-size-row", __size, ## __VA_ARGS__); \
} while(0)
#define GPU_SANITY_CHECK_RESPONSE(alg, dmat, ...) \
do{ \
cv::Mat d##dmat(dmat); \
cv::Mat __response = d##dmat.row(cv::gpu::alg##_GPU::RESPONSE_ROW); \
::perf::Regression::add(this, std::string(#dmat) + "-response-row", __response, ## __VA_ARGS__); \
} while(0)
#define FAIL_NO_CPU() FAIL() << "No such CPU implementation analogy"
#endif // __OPENCV_PERF_GPU_UTILITY_HPP__

File diff suppressed because it is too large Load Diff

View File

@@ -1,158 +1,158 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_nogpu(); }
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_nogpu(); }
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace device
{
namespace disp_bilateral_filter
{
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
template<typename T>
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::device::disp_bilateral_filter;
namespace
{
const float DEFAULT_EDGE_THRESHOLD = 0.1f;
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
const float DEFAULT_SIGMA_RANGE = 10.0f;
inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
{
Mat cpu_table_color(1, len, CV_32F);
float* line = cpu_table_color.ptr<float>();
for(int i = 0; i < len; i++)
line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
table_color.upload(cpu_table_color);
}
inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
{
int half = (win_size >> 1);
Mat cpu_table_space(half + 1, half + 1, CV_32F);
for (int y = 0; y <= half; ++y)
{
float* row = cpu_table_space.ptr<float>(y);
for (int x = 0; x <= half; ++x)
row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
}
table_space.upload(cpu_table_space);
}
template <typename T>
void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
GpuMat& table_color, GpuMat& table_space,
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
short max_disc = short(ndisp * max_disc_threshold + 0.5);
disp_load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
if (&dst != &disp)
{
if (stream)
stream.enqueueCopy(disp, dst);
else
disp.copyTo(dst);
}
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
}
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
GpuMat& table_color, GpuMat& table_space,
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
const bilateral_filter_operator_t operators[] =
{disp_bilateral_filter_operator<unsigned char>, 0, 0, disp_bilateral_filter_operator<short>, 0, 0, 0, 0};
}
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_)
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
sigma_range(DEFAULT_SIGMA_RANGE)
{
calc_color_weighted_table(table_color, sigma_range, 255);
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
}
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_,
float max_disc_threshold_, float sigma_range_)
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_),
sigma_range(sigma_range_)
{
calc_color_weighted_table(table_color, sigma_range, 255);
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
}
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
}
#endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_nogpu(); }
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_nogpu(); }
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace device
{
namespace disp_bilateral_filter
{
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
template<typename T>
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::device::disp_bilateral_filter;
namespace
{
const float DEFAULT_EDGE_THRESHOLD = 0.1f;
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
const float DEFAULT_SIGMA_RANGE = 10.0f;
inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
{
Mat cpu_table_color(1, len, CV_32F);
float* line = cpu_table_color.ptr<float>();
for(int i = 0; i < len; i++)
line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
table_color.upload(cpu_table_color);
}
inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
{
int half = (win_size >> 1);
Mat cpu_table_space(half + 1, half + 1, CV_32F);
for (int y = 0; y <= half; ++y)
{
float* row = cpu_table_space.ptr<float>(y);
for (int x = 0; x <= half; ++x)
row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
}
table_space.upload(cpu_table_space);
}
template <typename T>
void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
GpuMat& table_color, GpuMat& table_space,
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
short max_disc = short(ndisp * max_disc_threshold + 0.5);
disp_load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
if (&dst != &disp)
{
if (stream)
stream.enqueueCopy(disp, dst);
else
disp.copyTo(dst);
}
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
}
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
GpuMat& table_color, GpuMat& table_space,
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
const bilateral_filter_operator_t operators[] =
{disp_bilateral_filter_operator<unsigned char>, 0, 0, disp_bilateral_filter_operator<short>, 0, 0, 0, 0};
}
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_)
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
sigma_range(DEFAULT_SIGMA_RANGE)
{
calc_color_weighted_table(table_color, sigma_range, 255);
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
}
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_,
float max_disc_threshold_, float sigma_range_)
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_),
sigma_range(sigma_range_)
{
calc_color_weighted_table(table_color, sigma_range, 255);
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
}
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,100 +1,100 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
#else
namespace cv { namespace gpu { namespace device
{
namespace blend
{
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::device::blend;
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream)
{
CV_Assert(img1.size() == img2.size());
CV_Assert(img1.type() == img2.type());
CV_Assert(weights1.size() == img1.size());
CV_Assert(weights2.size() == img2.size());
CV_Assert(weights1.type() == CV_32F);
CV_Assert(weights2.type() == CV_32F);
const Size size = img1.size();
const int depth = img1.depth();
const int cn = img1.channels();
result.create(size, CV_MAKE_TYPE(depth, cn));
switch (depth)
{
case CV_8U:
if (cn != 4)
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
else
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
break;
case CV_32F:
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
}
}
#endif
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
#else
namespace cv { namespace gpu { namespace device
{
namespace blend
{
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::device::blend;
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream)
{
CV_Assert(img1.size() == img2.size());
CV_Assert(img1.type() == img2.type());
CV_Assert(weights1.size() == img1.size());
CV_Assert(weights2.size() == img2.size());
CV_Assert(weights1.type() == CV_32F);
CV_Assert(weights2.type() == CV_32F);
const Size size = img1.size();
const int depth = img1.depth();
const int cn = img1.channels();
result.create(size, CV_MAKE_TYPE(depth, cn));
switch (depth)
{
case CV_8U:
if (cn != 4)
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
else
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
break;
case CV_32F:
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,295 +1,295 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
#else
namespace cv { namespace gpu { namespace device
{
namespace transform_points
{
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
}
namespace project_points
{
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
}
namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}
}}}
using namespace ::cv::gpu::device;
namespace
{
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
// Convert rotation vector into matrix
Mat rot;
Rodrigues(rvec, rot);
dst.create(src.size(), src.type());
transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
}
}
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
{
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
}
namespace
{
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
CV_Assert(dist_coef.empty()); // Undistortion isn't supported
// Convert rotation vector into matrix
Mat rot;
Rodrigues(rvec, rot);
dst.create(src.size(), CV_32FC2);
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
}
}
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
{
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
}
namespace
{
// Selects subset_size random different points from [0, num_points - 1] range
void selectRandom(int subset_size, int num_points, vector<int>& subset)
{
subset.resize(subset_size);
for (int i = 0; i < subset_size; ++i)
{
bool was;
do
{
subset[i] = rand() % num_points;
was = false;
for (int j = 0; j < i; ++j)
if (subset[j] == subset[i])
{
was = true;
break;
}
} while (was);
}
}
// Computes rotation, translation pair for small subsets if the input data
class TransformHypothesesGenerator
{
public:
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
const Mat& camera_mat_, int num_points_, int subset_size_,
Mat rot_matrices_, Mat transl_vectors_)
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
transl_vectors(transl_vectors_) {}
void operator()(const BlockedRange& range) const
{
// Input data for generation of the current hypothesis
vector<int> subset_indices(subset_size);
Mat_<Point3f> object_subset(1, subset_size);
Mat_<Point2f> image_subset(1, subset_size);
// Current hypothesis data
Mat rot_vec(1, 3, CV_64F);
Mat rot_mat(3, 3, CV_64F);
Mat transl_vec(1, 3, CV_64F);
for (int iter = range.begin(); iter < range.end(); ++iter)
{
selectRandom(subset_size, num_points, subset_indices);
for (int i = 0; i < subset_size; ++i)
{
object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
}
solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
// Remember translation vector
Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
transl_vec = transl_vec.reshape(0, 1);
transl_vec.convertTo(transl_vec_, CV_32F);
// Remember rotation matrix
Rodrigues(rot_vec, rot_mat);
Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
rot_mat.convertTo(rot_mat_, CV_32F);
}
}
const Mat* object;
const Mat* image;
const Mat* dist_coef;
const Mat* camera_mat;
int num_points;
int subset_size;
// Hypotheses storage (global)
Mat rot_matrices;
Mat transl_vectors;
};
}
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
int num_iters, float max_dist, int min_inlier_count,
vector<int>* inliers)
{
(void)min_inlier_count;
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
CV_Assert(object.cols == image.cols);
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
const int subset_size = 4;
const int num_points = object.cols;
CV_Assert(num_points >= subset_size);
// Unapply distortion and intrinsic camera transformations
Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
Mat empty_dist_coef;
Mat image_normalized;
undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
// Hypotheses storage (global)
Mat rot_matrices(1, num_iters * 9, CV_32F);
Mat transl_vectors(1, num_iters * 3, CV_32F);
// Generate set of hypotheses using small subsets of the input data
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
num_points, subset_size, rot_matrices, transl_vectors);
parallel_for(BlockedRange(0, num_iters), body);
// Compute scores (i.e. number of inliers) for each hypothesis
GpuMat d_object(object);
GpuMat d_image_normalized(image_normalized);
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
solve_pnp_ransac::computeHypothesisScores(
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
d_hypothesis_scores.ptr<int>());
// Find the best hypothesis index
Point best_idx;
double best_score;
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
int num_inliers = static_cast<int>(best_score);
// Extract the best hypothesis data
Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
Rodrigues(rot_mat, rvec);
rvec = rvec.reshape(0, 1);
tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
tvec = tvec.reshape(0, 1);
// Build vector of inlier indices
if (inliers != NULL)
{
inliers->clear();
inliers->reserve(num_inliers);
Point3f p, p_transf;
Point2f p_proj;
const float* rot = rot_mat.ptr<float>();
const float* transl = tvec.ptr<float>();
for (int i = 0; i < num_points; ++i)
{
p = object.at<Point3f>(0, i);
p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
p_proj.x = p_transf.x / p_transf.z;
p_proj.y = p_transf.y / p_transf.z;
if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
inliers->push_back(i);
}
}
}
#endif
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
#else
namespace cv { namespace gpu { namespace device
{
namespace transform_points
{
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
}
namespace project_points
{
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
}
namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}
}}}
using namespace ::cv::gpu::device;
namespace
{
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
// Convert rotation vector into matrix
Mat rot;
Rodrigues(rvec, rot);
dst.create(src.size(), src.type());
transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
}
}
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
{
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
}
namespace
{
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
CV_Assert(dist_coef.empty()); // Undistortion isn't supported
// Convert rotation vector into matrix
Mat rot;
Rodrigues(rvec, rot);
dst.create(src.size(), CV_32FC2);
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
}
}
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
{
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
}
namespace
{
// Selects subset_size random different points from [0, num_points - 1] range
void selectRandom(int subset_size, int num_points, vector<int>& subset)
{
subset.resize(subset_size);
for (int i = 0; i < subset_size; ++i)
{
bool was;
do
{
subset[i] = rand() % num_points;
was = false;
for (int j = 0; j < i; ++j)
if (subset[j] == subset[i])
{
was = true;
break;
}
} while (was);
}
}
// Computes rotation, translation pair for small subsets if the input data
class TransformHypothesesGenerator
{
public:
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
const Mat& camera_mat_, int num_points_, int subset_size_,
Mat rot_matrices_, Mat transl_vectors_)
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
transl_vectors(transl_vectors_) {}
void operator()(const BlockedRange& range) const
{
// Input data for generation of the current hypothesis
vector<int> subset_indices(subset_size);
Mat_<Point3f> object_subset(1, subset_size);
Mat_<Point2f> image_subset(1, subset_size);
// Current hypothesis data
Mat rot_vec(1, 3, CV_64F);
Mat rot_mat(3, 3, CV_64F);
Mat transl_vec(1, 3, CV_64F);
for (int iter = range.begin(); iter < range.end(); ++iter)
{
selectRandom(subset_size, num_points, subset_indices);
for (int i = 0; i < subset_size; ++i)
{
object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
}
solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
// Remember translation vector
Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
transl_vec = transl_vec.reshape(0, 1);
transl_vec.convertTo(transl_vec_, CV_32F);
// Remember rotation matrix
Rodrigues(rot_vec, rot_mat);
Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
rot_mat.convertTo(rot_mat_, CV_32F);
}
}
const Mat* object;
const Mat* image;
const Mat* dist_coef;
const Mat* camera_mat;
int num_points;
int subset_size;
// Hypotheses storage (global)
Mat rot_matrices;
Mat transl_vectors;
};
}
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
int num_iters, float max_dist, int min_inlier_count,
vector<int>* inliers)
{
(void)min_inlier_count;
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
CV_Assert(object.cols == image.cols);
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
const int subset_size = 4;
const int num_points = object.cols;
CV_Assert(num_points >= subset_size);
// Unapply distortion and intrinsic camera transformations
Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
Mat empty_dist_coef;
Mat image_normalized;
undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
// Hypotheses storage (global)
Mat rot_matrices(1, num_iters * 9, CV_32F);
Mat transl_vectors(1, num_iters * 3, CV_32F);
// Generate set of hypotheses using small subsets of the input data
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
num_points, subset_size, rot_matrices, transl_vectors);
parallel_for(BlockedRange(0, num_iters), body);
// Compute scores (i.e. number of inliers) for each hypothesis
GpuMat d_object(object);
GpuMat d_image_normalized(image_normalized);
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
solve_pnp_ransac::computeHypothesisScores(
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
d_hypothesis_scores.ptr<int>());
// Find the best hypothesis index
Point best_idx;
double best_score;
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
int num_inliers = static_cast<int>(best_score);
// Extract the best hypothesis data
Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
Rodrigues(rot_mat, rvec);
rvec = rvec.reshape(0, 1);
tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
tvec = tvec.reshape(0, 1);
// Build vector of inlier indices
if (inliers != NULL)
{
inliers->clear();
inliers->reserve(num_inliers);
Point3f p, p_transf;
Point2f p_proj;
const float* rot = rot_mat.ptr<float>();
const float* transl = tvec.ptr<float>();
for (int i = 0; i < num_points; ++i)
{
p = object.at<Point3f>(0, i);
p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
p_proj.x = p_transf.x / p_transf.z;
p_proj.y = p_transf.y / p_transf.z;
if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
inliers->push_back(i);
}
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,139 +1,139 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "cu_safe_call.h"
#ifdef HAVE_CUDA
namespace
{
#define error_entry(entry) { entry, #entry }
struct ErrorEntry
{
int code;
std::string str;
};
class ErrorEntryComparer
{
public:
inline ErrorEntryComparer(int code) : code_(code) {}
inline bool operator()(const ErrorEntry& e) const { return e.code == code_; }
private:
int code_;
};
std::string getErrorString(int code, const ErrorEntry* errors, size_t n)
{
size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
const std::string& msg = (idx != n) ? errors[idx].str : std::string("Unknown error code");
std::ostringstream ostr;
ostr << msg << " [Code = " << code << "]";
return ostr.str();
}
const ErrorEntry cu_errors [] =
{
error_entry( CUDA_SUCCESS ),
error_entry( CUDA_ERROR_INVALID_VALUE ),
error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
error_entry( CUDA_ERROR_NOT_INITIALIZED ),
error_entry( CUDA_ERROR_DEINITIALIZED ),
error_entry( CUDA_ERROR_PROFILER_DISABLED ),
error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
error_entry( CUDA_ERROR_NO_DEVICE ),
error_entry( CUDA_ERROR_INVALID_DEVICE ),
error_entry( CUDA_ERROR_INVALID_IMAGE ),
error_entry( CUDA_ERROR_INVALID_CONTEXT ),
error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
error_entry( CUDA_ERROR_MAP_FAILED ),
error_entry( CUDA_ERROR_UNMAP_FAILED ),
error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
error_entry( CUDA_ERROR_ALREADY_MAPPED ),
error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
error_entry( CUDA_ERROR_NOT_MAPPED ),
error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
error_entry( CUDA_ERROR_INVALID_SOURCE ),
error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
error_entry( CUDA_ERROR_INVALID_HANDLE ),
error_entry( CUDA_ERROR_NOT_FOUND ),
error_entry( CUDA_ERROR_NOT_READY ),
error_entry( CUDA_ERROR_LAUNCH_FAILED ),
error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
error_entry( CUDA_ERROR_ASSERT ),
error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
error_entry( CUDA_ERROR_UNKNOWN )
};
const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
}
std::string cv::gpu::detail::cuGetErrString(CUresult res)
{
return getErrorString(res, cu_errors, cu_errors_num);
}
#endif // HAVE_CUDA
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "cu_safe_call.h"
#ifdef HAVE_CUDA
namespace
{
#define error_entry(entry) { entry, #entry }
struct ErrorEntry
{
int code;
std::string str;
};
class ErrorEntryComparer
{
public:
inline ErrorEntryComparer(int code) : code_(code) {}
inline bool operator()(const ErrorEntry& e) const { return e.code == code_; }
private:
int code_;
};
std::string getErrorString(int code, const ErrorEntry* errors, size_t n)
{
size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
const std::string& msg = (idx != n) ? errors[idx].str : std::string("Unknown error code");
std::ostringstream ostr;
ostr << msg << " [Code = " << code << "]";
return ostr.str();
}
const ErrorEntry cu_errors [] =
{
error_entry( CUDA_SUCCESS ),
error_entry( CUDA_ERROR_INVALID_VALUE ),
error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
error_entry( CUDA_ERROR_NOT_INITIALIZED ),
error_entry( CUDA_ERROR_DEINITIALIZED ),
error_entry( CUDA_ERROR_PROFILER_DISABLED ),
error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
error_entry( CUDA_ERROR_NO_DEVICE ),
error_entry( CUDA_ERROR_INVALID_DEVICE ),
error_entry( CUDA_ERROR_INVALID_IMAGE ),
error_entry( CUDA_ERROR_INVALID_CONTEXT ),
error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
error_entry( CUDA_ERROR_MAP_FAILED ),
error_entry( CUDA_ERROR_UNMAP_FAILED ),
error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
error_entry( CUDA_ERROR_ALREADY_MAPPED ),
error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
error_entry( CUDA_ERROR_NOT_MAPPED ),
error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
error_entry( CUDA_ERROR_INVALID_SOURCE ),
error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
error_entry( CUDA_ERROR_INVALID_HANDLE ),
error_entry( CUDA_ERROR_NOT_FOUND ),
error_entry( CUDA_ERROR_NOT_READY ),
error_entry( CUDA_ERROR_LAUNCH_FAILED ),
error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
error_entry( CUDA_ERROR_ASSERT ),
error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
error_entry( CUDA_ERROR_UNKNOWN )
};
const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
}
std::string cv::gpu::detail::cuGetErrString(CUresult res)
{
return getErrorString(res, cu_errors, cu_errors_num);
}
#endif // HAVE_CUDA

View File

@@ -1,67 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __CU_SAFE_CALL_H__
#define __CU_SAFE_CALL_H__
#include "precomp.hpp"
#ifdef HAVE_CUDA
namespace cv { namespace gpu {
namespace detail
{
std::string cuGetErrString(CUresult res);
inline void cuSafeCall_impl(CUresult res, const char* file, int line)
{
if (res != CUDA_SUCCESS)
cv::error( cv::Exception(CV_GpuApiCallError, cuGetErrString(res), "unknown function", file, line) );
}
}
}}
#define cuSafeCall( op ) cv::gpu::detail::cuSafeCall_impl( (op), __FILE__, __LINE__ )
#endif // HAVE_CUDA
#endif // __CU_SAFE_CALL_H__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __CU_SAFE_CALL_H__
#define __CU_SAFE_CALL_H__
#include "precomp.hpp"
#ifdef HAVE_CUDA
namespace cv { namespace gpu {
namespace detail
{
std::string cuGetErrString(CUresult res);
inline void cuSafeCall_impl(CUresult res, const char* file, int line)
{
if (res != CUDA_SUCCESS)
cv::error( cv::Exception(CV_GpuApiCallError, cuGetErrString(res), "unknown function", file, line) );
}
}
}}
#define cuSafeCall( op ) cv::gpu::detail::cuSafeCall_impl( (op), __FILE__, __LINE__ )
#endif // HAVE_CUDA
#endif // __CU_SAFE_CALL_H__

View File

@@ -1,212 +1,212 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/*
NV12ToARGB color space conversion CUDA kernel
This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
source and converts to output in ARGB format
*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device {
namespace video_decoding
{
__constant__ uint constAlpha = ((uint)0xff << 24);
__constant__ float constHueColorSpaceMat[9];
void loadHueCSC(float hueCSC[9])
{
cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
}
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
{
float luma, chromaCb, chromaCr;
// Prepare for hue adjustment
luma = (float)yuvi[0];
chromaCb = (float)((int)yuvi[1] - 512.0f);
chromaCr = (float)((int)yuvi[2] - 512.0f);
// Convert YUV To RGB with hue adjustment
*red = (luma * constHueColorSpaceMat[0]) +
(chromaCb * constHueColorSpaceMat[1]) +
(chromaCr * constHueColorSpaceMat[2]);
*green = (luma * constHueColorSpaceMat[3]) +
(chromaCb * constHueColorSpaceMat[4]) +
(chromaCr * constHueColorSpaceMat[5]);
*blue = (luma * constHueColorSpaceMat[6]) +
(chromaCb * constHueColorSpaceMat[7]) +
(chromaCr * constHueColorSpaceMat[8]);
}
__device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha)
{
uint ARGBpixel = 0;
// Clamp final 10 bit results
red = ::fmin(::fmax(red, 0.0f), 1023.f);
green = ::fmin(::fmax(green, 0.0f), 1023.f);
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
// Convert to 8 bit unsigned integers per color component
ARGBpixel = (((uint)blue >> 2) |
(((uint)green >> 2) << 8) |
(((uint)red >> 2) << 16) |
(uint)alpha);
return ARGBpixel;
}
// CUDA kernel for outputing the final ARGB output from NV12
#define COLOR_COMPONENT_BIT_SIZE 10
#define COLOR_COMPONENT_MASK 0x3FF
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
uint* dstImage, size_t nDestPitch,
uint width, uint height)
{
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
// if we move to texture we could read 4 luminance values
uint yuv101010Pel[2];
yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2;
yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
const size_t chromaOffset = nSourcePitch * height;
const int y_chroma = y >> 1;
if (y & 1) // odd scanline ?
{
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
{
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
}
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
}
else
{
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
}
// this steps performs the color conversion
uint yuvi[6];
float red[2], green[2], blue[2];
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
// YUV to RGB Transformation conversion
YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
// Clamp the results to RGBA
const size_t dstImagePitch = nDestPitch >> 2;
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
}
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
interopFrame.cols, interopFrame.rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/*
NV12ToARGB color space conversion CUDA kernel
This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
source and converts to output in ARGB format
*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device {
namespace video_decoding
{
__constant__ uint constAlpha = ((uint)0xff << 24);
__constant__ float constHueColorSpaceMat[9];
void loadHueCSC(float hueCSC[9])
{
cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
}
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
{
float luma, chromaCb, chromaCr;
// Prepare for hue adjustment
luma = (float)yuvi[0];
chromaCb = (float)((int)yuvi[1] - 512.0f);
chromaCr = (float)((int)yuvi[2] - 512.0f);
// Convert YUV To RGB with hue adjustment
*red = (luma * constHueColorSpaceMat[0]) +
(chromaCb * constHueColorSpaceMat[1]) +
(chromaCr * constHueColorSpaceMat[2]);
*green = (luma * constHueColorSpaceMat[3]) +
(chromaCb * constHueColorSpaceMat[4]) +
(chromaCr * constHueColorSpaceMat[5]);
*blue = (luma * constHueColorSpaceMat[6]) +
(chromaCb * constHueColorSpaceMat[7]) +
(chromaCr * constHueColorSpaceMat[8]);
}
__device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha)
{
uint ARGBpixel = 0;
// Clamp final 10 bit results
red = ::fmin(::fmax(red, 0.0f), 1023.f);
green = ::fmin(::fmax(green, 0.0f), 1023.f);
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
// Convert to 8 bit unsigned integers per color component
ARGBpixel = (((uint)blue >> 2) |
(((uint)green >> 2) << 8) |
(((uint)red >> 2) << 16) |
(uint)alpha);
return ARGBpixel;
}
// CUDA kernel for outputing the final ARGB output from NV12
#define COLOR_COMPONENT_BIT_SIZE 10
#define COLOR_COMPONENT_MASK 0x3FF
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
uint* dstImage, size_t nDestPitch,
uint width, uint height)
{
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
// if we move to texture we could read 4 luminance values
uint yuv101010Pel[2];
yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2;
yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
const size_t chromaOffset = nSourcePitch * height;
const int y_chroma = y >> 1;
if (y & 1) // odd scanline ?
{
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
{
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
}
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
}
else
{
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
}
// this steps performs the color conversion
uint yuvi[6];
float red[2], green[2], blue[2];
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
// YUV to RGB Transformation conversion
YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
// Clamp the results to RGBA
const size_t dstImagePitch = nDestPitch >> 2;
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
}
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
interopFrame.cols, interopFrame.rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
#endif /* CUDA_DISABLER */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,472 +1,472 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/vec_distance.hpp"
#include "opencv2/gpu/device/datamov_utils.hpp"
namespace cv { namespace gpu { namespace device
{
namespace bf_radius_match
{
///////////////////////////////////////////////////////////////////////////////
// Match Unrolled
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
__global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[];
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
Dist dist;
#pragma unroll
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
{
const int loadX = threadIdx.x + i * BLOCK_SIZE;
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
if (loadX < query.cols)
{
T val;
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
}
__syncthreads();
#pragma unroll
for (int j = 0; j < BLOCK_SIZE; ++j)
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
__syncthreads();
}
float distVal = (typename Dist::result_type)dist;
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
{
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
if (ind < maxCount)
{
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
bestDistance.ptr(queryIdx)[ind] = distVal;
}
}
#endif
}
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
for (int i = 0; i < n; ++i)
{
const PtrStepSz<T> train = trains[i];
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
if (masks != 0 && masks[i].data)
{
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
else
{
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// Match
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
__global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[];
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
Dist dist;
for (int i = 0, endi = (query.cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endi; ++i)
{
const int loadX = threadIdx.x + i * BLOCK_SIZE;
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
if (loadX < query.cols)
{
T val;
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
}
__syncthreads();
#pragma unroll
for (int j = 0; j < BLOCK_SIZE; ++j)
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
__syncthreads();
}
float distVal = (typename Dist::result_type)dist;
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
{
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
if (ind < maxCount)
{
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
bestDistance.ptr(queryIdx)[ind] = distVal;
}
}
#endif
}
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int BLOCK_SIZE, typename Dist, typename T>
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
for (int i = 0; i < n; ++i)
{
const PtrStepSz<T> train = trains[i];
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
if (masks != 0 && masks[i].data)
{
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
else
{
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// Match dispatcher
template <typename Dist, typename T, typename Mask>
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
(void)cc;
if (query.cols <= 64)
{
matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 128)
{
matchUnrolled<16, 128, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}*/
else
{
match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
}
template <typename Dist, typename T>
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
(void)cc;
if (query.cols <= 64)
{
matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 128)
{
matchUnrolled<16, 128, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}*/
else
{
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
}
///////////////////////////////////////////////////////////////////////////////
// Radius Match caller
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
if (mask.data)
{
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
cc, stream);
}
else
{
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
cc, stream);
}
}
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
if (mask.data)
{
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
cc, stream);
}
else
{
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
cc, stream);
}
}
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
if (mask.data)
{
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
cc, stream);
}
else
{
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
cc, stream);
}
}
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
trainIdx, imgIdx, distance, nMatches,
cc, stream);
}
template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
trainIdx, imgIdx, distance, nMatches,
cc, stream);
}
//template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
trainIdx, imgIdx, distance, nMatches,
cc, stream);
}
template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
} // namespace bf_radius_match
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/vec_distance.hpp"
#include "opencv2/gpu/device/datamov_utils.hpp"
namespace cv { namespace gpu { namespace device
{
namespace bf_radius_match
{
///////////////////////////////////////////////////////////////////////////////
// Match Unrolled
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
__global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[];
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
Dist dist;
#pragma unroll
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
{
const int loadX = threadIdx.x + i * BLOCK_SIZE;
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
if (loadX < query.cols)
{
T val;
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
}
__syncthreads();
#pragma unroll
for (int j = 0; j < BLOCK_SIZE; ++j)
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
__syncthreads();
}
float distVal = (typename Dist::result_type)dist;
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
{
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
if (ind < maxCount)
{
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
bestDistance.ptr(queryIdx)[ind] = distVal;
}
}
#endif
}
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
for (int i = 0; i < n; ++i)
{
const PtrStepSz<T> train = trains[i];
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
if (masks != 0 && masks[i].data)
{
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
else
{
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// Match
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
__global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
extern __shared__ int smem[];
const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
Dist dist;
for (int i = 0, endi = (query.cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endi; ++i)
{
const int loadX = threadIdx.x + i * BLOCK_SIZE;
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
if (loadX < query.cols)
{
T val;
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
}
__syncthreads();
#pragma unroll
for (int j = 0; j < BLOCK_SIZE; ++j)
dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
__syncthreads();
}
float distVal = (typename Dist::result_type)dist;
if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx) && distVal < maxDistance)
{
unsigned int ind = atomicInc(nMatches + queryIdx, (unsigned int) -1);
if (ind < maxCount)
{
bestTrainIdx.ptr(queryIdx)[ind] = trainIdx;
if (SAVE_IMG_IDX) bestImgIdx.ptr(queryIdx)[ind] = imgIdx;
bestDistance.ptr(queryIdx)[ind] = distVal;
}
}
#endif
}
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int BLOCK_SIZE, typename Dist, typename T>
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
cudaStream_t stream)
{
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
for (int i = 0; i < n; ++i)
{
const PtrStepSz<T> train = trains[i];
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
if (masks != 0 && masks[i].data)
{
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, SingleMask(masks[i]),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
else
{
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
}
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
///////////////////////////////////////////////////////////////////////////////
// Match dispatcher
template <typename Dist, typename T, typename Mask>
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
(void)cc;
if (query.cols <= 64)
{
matchUnrolled<16, 64, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 128)
{
matchUnrolled<16, 128, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}*/
else
{
match<16, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream);
}
}
template <typename Dist, typename T>
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
(void)cc;
if (query.cols <= 64)
{
matchUnrolled<16, 64, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 128)
{
matchUnrolled<16, 128, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
/*else if (query.cols <= 256)
{
matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 512)
{
matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
else if (query.cols <= 1024)
{
matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}*/
else
{
match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream);
}
}
///////////////////////////////////////////////////////////////////////////////
// Radius Match caller
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
if (mask.data)
{
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
cc, stream);
}
else
{
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
cc, stream);
}
}
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
if (mask.data)
{
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
cc, stream);
}
else
{
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
cc, stream);
}
}
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
if (mask.data)
{
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
trainIdx, distance, nMatches,
cc, stream);
}
else
{
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
trainIdx, distance, nMatches,
cc, stream);
}
}
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
trainIdx, imgIdx, distance, nMatches,
cc, stream);
}
template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
trainIdx, imgIdx, distance, nMatches,
cc, stream);
}
//template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
int cc, cudaStream_t stream)
{
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
trainIdx, imgIdx, distance, nMatches,
cc, stream);
}
template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
//template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
} // namespace bf_radius_match
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,201 +1,201 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
using namespace cv::gpu;
typedef unsigned char uchar;
typedef unsigned short ushort;
//////////////////////////////////////////////////////////////////////////////////
/// Bilateral filtering
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
template<typename T, typename B>
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x >= src.cols || y >= src.rows)
return;
value_type center = saturate_cast<value_type>(src(y, x));
value_type sum1 = VecTraits<value_type>::all(0);
float sum2 = 0;
int r = ksz / 2;
float r2 = (float)(r * r);
int tx = x - r + ksz;
int ty = y - r + ksz;
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
{
for (int cy = y - r; cy < ty; ++cy)
for (int cx = x - r; cx < tx; ++cx)
{
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
if (space2 > r2)
continue;
value_type value = saturate_cast<value_type>(src(cy, cx));
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
sum1 = sum1 + weight * value;
sum2 = sum2 + weight;
}
}
else
{
for (int cy = y - r; cy < ty; ++cy)
for (int cx = x - r; cx < tx; ++cx)
{
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
if (space2 > r2)
continue;
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
sum1 = sum1 + weight * value;
sum2 = sum2 + weight;
}
}
dst(y, x) = saturate_cast<T>(sum1 / sum2);
}
template<typename T, template <typename> class B>
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
{
dim3 block (32, 8);
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
B<T> b(src.rows, src.cols);
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
cudaSafeCall ( cudaGetLastError () );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template<typename T>
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
static caller_t funcs[] =
{
bilateral_caller<T, BrdReflect101>,
bilateral_caller<T, BrdReplicate>,
bilateral_caller<T, BrdConstant>,
bilateral_caller<T, BrdReflect>,
bilateral_caller<T, BrdWrap>,
};
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
}
}
}}}
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
OCV_INSTANTIATE_BILATERAL_FILTER(short)
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
OCV_INSTANTIATE_BILATERAL_FILTER(float)
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
using namespace cv::gpu;
typedef unsigned char uchar;
typedef unsigned short ushort;
//////////////////////////////////////////////////////////////////////////////////
/// Bilateral filtering
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
template<typename T, typename B>
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x >= src.cols || y >= src.rows)
return;
value_type center = saturate_cast<value_type>(src(y, x));
value_type sum1 = VecTraits<value_type>::all(0);
float sum2 = 0;
int r = ksz / 2;
float r2 = (float)(r * r);
int tx = x - r + ksz;
int ty = y - r + ksz;
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
{
for (int cy = y - r; cy < ty; ++cy)
for (int cx = x - r; cx < tx; ++cx)
{
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
if (space2 > r2)
continue;
value_type value = saturate_cast<value_type>(src(cy, cx));
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
sum1 = sum1 + weight * value;
sum2 = sum2 + weight;
}
}
else
{
for (int cy = y - r; cy < ty; ++cy)
for (int cx = x - r; cx < tx; ++cx)
{
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
if (space2 > r2)
continue;
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
sum1 = sum1 + weight * value;
sum2 = sum2 + weight;
}
}
dst(y, x) = saturate_cast<T>(sum1 / sum2);
}
template<typename T, template <typename> class B>
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
{
dim3 block (32, 8);
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
B<T> b(src.rows, src.cols);
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
cudaSafeCall ( cudaGetLastError () );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template<typename T>
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
static caller_t funcs[] =
{
bilateral_caller<T, BrdReflect101>,
bilateral_caller<T, BrdReplicate>,
bilateral_caller<T, BrdConstant>,
bilateral_caller<T, BrdReflect>,
bilateral_caller<T, BrdWrap>,
};
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
}
}
}}}
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
template void cv::gpu::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
OCV_INSTANTIATE_BILATERAL_FILTER(short)
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
OCV_INSTANTIATE_BILATERAL_FILTER(float)
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
#endif /* CUDA_DISABLER */

View File

@@ -1,121 +1,121 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
namespace blend
{
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
int x_ = x / cn;
float w1 = weights1.ptr(y)[x_];
float w2 = weights2.ptr(y)[x_];
T p1 = img1.ptr(y)[x];
T p2 = img2.ptr(y)[x];
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
}
}
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
float w1 = weights1.ptr(y)[x];
float w2 = weights2.ptr(y)[x];
float sum_inv = 1.f / (w1 + w2 + 1e-5f);
w1 *= sum_inv;
w2 *= sum_inv;
uchar4 p1 = ((const uchar4*)img1.ptr(y))[x];
uchar4 p2 = ((const uchar4*)img2.ptr(y))[x];
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
}
}
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
} // namespace blend
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
namespace blend
{
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
int x_ = x / cn;
float w1 = weights1.ptr(y)[x_];
float w2 = weights2.ptr(y)[x_];
T p1 = img1.ptr(y)[x];
T p2 = img2.ptr(y)[x];
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
}
}
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
float w1 = weights1.ptr(y)[x];
float w2 = weights2.ptr(y)[x];
float sum_inv = 1.f / (w1 + w2 + 1e-5f);
w1 *= sum_inv;
w2 *= sum_inv;
uchar4 p1 = ((const uchar4*)img1.ptr(y))[x];
uchar4 p2 = ((const uchar4*)img2.ptr(y))[x];
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
}
}
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
} // namespace blend
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

File diff suppressed because it is too large Load Diff

View File

@@ -1,384 +1,384 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <internal_shared.hpp>
#include <opencv2/gpu/device/transform.hpp>
#include <opencv2/gpu/device/color.hpp>
#include <cvt_colot_internal.h>
namespace cv { namespace gpu { namespace device
{
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_x = 8 };
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
{
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
{
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \
{ \
traits::functor_type functor = traits::create_functor(); \
typedef typename traits::functor_type::argument_type src_t; \
typedef typename traits::functor_type::result_type dst_t; \
cv::gpu::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
}
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits<ushort>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <internal_shared.hpp>
#include <opencv2/gpu/device/transform.hpp>
#include <opencv2/gpu/device/color.hpp>
#include <cvt_colot_internal.h>
namespace cv { namespace gpu { namespace device
{
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_x = 8 };
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
{
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
{
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \
{ \
traits::functor_type functor = traits::create_functor(); \
typedef typename traits::functor_type::argument_type src_t; \
typedef typename traits::functor_type::result_type dst_t; \
cv::gpu::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
}
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits<ushort>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hsv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(hls4_to_bgra)
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,388 +1,388 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
{
namespace column_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = KSIZE <= 16 ? 1 : 2;
#else
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 2;
const int HALO_SIZE = 2;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
if (x >= src.cols)
return;
const T* src_col = src.ptr() + x;
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
if (blockIdx.y > 0)
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
}
else
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
}
if (blockIdx.y + 2 < gridDim.y)
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
}
else
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int y = yStart + j * BLOCK_DIM_Y;
if (y < src.rows)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 16;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 2;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
B<T> brd(src.rows);
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T, typename D>
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReflect101>,
linearColumnFilter_caller< 2, T, D, BrdColReflect101>,
linearColumnFilter_caller< 3, T, D, BrdColReflect101>,
linearColumnFilter_caller< 4, T, D, BrdColReflect101>,
linearColumnFilter_caller< 5, T, D, BrdColReflect101>,
linearColumnFilter_caller< 6, T, D, BrdColReflect101>,
linearColumnFilter_caller< 7, T, D, BrdColReflect101>,
linearColumnFilter_caller< 8, T, D, BrdColReflect101>,
linearColumnFilter_caller< 9, T, D, BrdColReflect101>,
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
linearColumnFilter_caller<17, T, D, BrdColReflect101>,
linearColumnFilter_caller<18, T, D, BrdColReflect101>,
linearColumnFilter_caller<19, T, D, BrdColReflect101>,
linearColumnFilter_caller<20, T, D, BrdColReflect101>,
linearColumnFilter_caller<21, T, D, BrdColReflect101>,
linearColumnFilter_caller<22, T, D, BrdColReflect101>,
linearColumnFilter_caller<23, T, D, BrdColReflect101>,
linearColumnFilter_caller<24, T, D, BrdColReflect101>,
linearColumnFilter_caller<25, T, D, BrdColReflect101>,
linearColumnFilter_caller<26, T, D, BrdColReflect101>,
linearColumnFilter_caller<27, T, D, BrdColReflect101>,
linearColumnFilter_caller<28, T, D, BrdColReflect101>,
linearColumnFilter_caller<29, T, D, BrdColReflect101>,
linearColumnFilter_caller<30, T, D, BrdColReflect101>,
linearColumnFilter_caller<31, T, D, BrdColReflect101>,
linearColumnFilter_caller<32, T, D, BrdColReflect101>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReplicate>,
linearColumnFilter_caller< 2, T, D, BrdColReplicate>,
linearColumnFilter_caller< 3, T, D, BrdColReplicate>,
linearColumnFilter_caller< 4, T, D, BrdColReplicate>,
linearColumnFilter_caller< 5, T, D, BrdColReplicate>,
linearColumnFilter_caller< 6, T, D, BrdColReplicate>,
linearColumnFilter_caller< 7, T, D, BrdColReplicate>,
linearColumnFilter_caller< 8, T, D, BrdColReplicate>,
linearColumnFilter_caller< 9, T, D, BrdColReplicate>,
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
linearColumnFilter_caller<17, T, D, BrdColReplicate>,
linearColumnFilter_caller<18, T, D, BrdColReplicate>,
linearColumnFilter_caller<19, T, D, BrdColReplicate>,
linearColumnFilter_caller<20, T, D, BrdColReplicate>,
linearColumnFilter_caller<21, T, D, BrdColReplicate>,
linearColumnFilter_caller<22, T, D, BrdColReplicate>,
linearColumnFilter_caller<23, T, D, BrdColReplicate>,
linearColumnFilter_caller<24, T, D, BrdColReplicate>,
linearColumnFilter_caller<25, T, D, BrdColReplicate>,
linearColumnFilter_caller<26, T, D, BrdColReplicate>,
linearColumnFilter_caller<27, T, D, BrdColReplicate>,
linearColumnFilter_caller<28, T, D, BrdColReplicate>,
linearColumnFilter_caller<29, T, D, BrdColReplicate>,
linearColumnFilter_caller<30, T, D, BrdColReplicate>,
linearColumnFilter_caller<31, T, D, BrdColReplicate>,
linearColumnFilter_caller<32, T, D, BrdColReplicate>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColConstant>,
linearColumnFilter_caller< 2, T, D, BrdColConstant>,
linearColumnFilter_caller< 3, T, D, BrdColConstant>,
linearColumnFilter_caller< 4, T, D, BrdColConstant>,
linearColumnFilter_caller< 5, T, D, BrdColConstant>,
linearColumnFilter_caller< 6, T, D, BrdColConstant>,
linearColumnFilter_caller< 7, T, D, BrdColConstant>,
linearColumnFilter_caller< 8, T, D, BrdColConstant>,
linearColumnFilter_caller< 9, T, D, BrdColConstant>,
linearColumnFilter_caller<10, T, D, BrdColConstant>,
linearColumnFilter_caller<11, T, D, BrdColConstant>,
linearColumnFilter_caller<12, T, D, BrdColConstant>,
linearColumnFilter_caller<13, T, D, BrdColConstant>,
linearColumnFilter_caller<14, T, D, BrdColConstant>,
linearColumnFilter_caller<15, T, D, BrdColConstant>,
linearColumnFilter_caller<16, T, D, BrdColConstant>,
linearColumnFilter_caller<17, T, D, BrdColConstant>,
linearColumnFilter_caller<18, T, D, BrdColConstant>,
linearColumnFilter_caller<19, T, D, BrdColConstant>,
linearColumnFilter_caller<20, T, D, BrdColConstant>,
linearColumnFilter_caller<21, T, D, BrdColConstant>,
linearColumnFilter_caller<22, T, D, BrdColConstant>,
linearColumnFilter_caller<23, T, D, BrdColConstant>,
linearColumnFilter_caller<24, T, D, BrdColConstant>,
linearColumnFilter_caller<25, T, D, BrdColConstant>,
linearColumnFilter_caller<26, T, D, BrdColConstant>,
linearColumnFilter_caller<27, T, D, BrdColConstant>,
linearColumnFilter_caller<28, T, D, BrdColConstant>,
linearColumnFilter_caller<29, T, D, BrdColConstant>,
linearColumnFilter_caller<30, T, D, BrdColConstant>,
linearColumnFilter_caller<31, T, D, BrdColConstant>,
linearColumnFilter_caller<32, T, D, BrdColConstant>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReflect>,
linearColumnFilter_caller< 2, T, D, BrdColReflect>,
linearColumnFilter_caller< 3, T, D, BrdColReflect>,
linearColumnFilter_caller< 4, T, D, BrdColReflect>,
linearColumnFilter_caller< 5, T, D, BrdColReflect>,
linearColumnFilter_caller< 6, T, D, BrdColReflect>,
linearColumnFilter_caller< 7, T, D, BrdColReflect>,
linearColumnFilter_caller< 8, T, D, BrdColReflect>,
linearColumnFilter_caller< 9, T, D, BrdColReflect>,
linearColumnFilter_caller<10, T, D, BrdColReflect>,
linearColumnFilter_caller<11, T, D, BrdColReflect>,
linearColumnFilter_caller<12, T, D, BrdColReflect>,
linearColumnFilter_caller<13, T, D, BrdColReflect>,
linearColumnFilter_caller<14, T, D, BrdColReflect>,
linearColumnFilter_caller<15, T, D, BrdColReflect>,
linearColumnFilter_caller<16, T, D, BrdColReflect>,
linearColumnFilter_caller<17, T, D, BrdColReflect>,
linearColumnFilter_caller<18, T, D, BrdColReflect>,
linearColumnFilter_caller<19, T, D, BrdColReflect>,
linearColumnFilter_caller<20, T, D, BrdColReflect>,
linearColumnFilter_caller<21, T, D, BrdColReflect>,
linearColumnFilter_caller<22, T, D, BrdColReflect>,
linearColumnFilter_caller<23, T, D, BrdColReflect>,
linearColumnFilter_caller<24, T, D, BrdColReflect>,
linearColumnFilter_caller<25, T, D, BrdColReflect>,
linearColumnFilter_caller<26, T, D, BrdColReflect>,
linearColumnFilter_caller<27, T, D, BrdColReflect>,
linearColumnFilter_caller<28, T, D, BrdColReflect>,
linearColumnFilter_caller<29, T, D, BrdColReflect>,
linearColumnFilter_caller<30, T, D, BrdColReflect>,
linearColumnFilter_caller<31, T, D, BrdColReflect>,
linearColumnFilter_caller<32, T, D, BrdColReflect>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColWrap>,
linearColumnFilter_caller< 2, T, D, BrdColWrap>,
linearColumnFilter_caller< 3, T, D, BrdColWrap>,
linearColumnFilter_caller< 4, T, D, BrdColWrap>,
linearColumnFilter_caller< 5, T, D, BrdColWrap>,
linearColumnFilter_caller< 6, T, D, BrdColWrap>,
linearColumnFilter_caller< 7, T, D, BrdColWrap>,
linearColumnFilter_caller< 8, T, D, BrdColWrap>,
linearColumnFilter_caller< 9, T, D, BrdColWrap>,
linearColumnFilter_caller<10, T, D, BrdColWrap>,
linearColumnFilter_caller<11, T, D, BrdColWrap>,
linearColumnFilter_caller<12, T, D, BrdColWrap>,
linearColumnFilter_caller<13, T, D, BrdColWrap>,
linearColumnFilter_caller<14, T, D, BrdColWrap>,
linearColumnFilter_caller<15, T, D, BrdColWrap>,
linearColumnFilter_caller<16, T, D, BrdColWrap>,
linearColumnFilter_caller<17, T, D, BrdColWrap>,
linearColumnFilter_caller<18, T, D, BrdColWrap>,
linearColumnFilter_caller<19, T, D, BrdColWrap>,
linearColumnFilter_caller<20, T, D, BrdColWrap>,
linearColumnFilter_caller<21, T, D, BrdColWrap>,
linearColumnFilter_caller<22, T, D, BrdColWrap>,
linearColumnFilter_caller<23, T, D, BrdColWrap>,
linearColumnFilter_caller<24, T, D, BrdColWrap>,
linearColumnFilter_caller<25, T, D, BrdColWrap>,
linearColumnFilter_caller<26, T, D, BrdColWrap>,
linearColumnFilter_caller<27, T, D, BrdColWrap>,
linearColumnFilter_caller<28, T, D, BrdColWrap>,
linearColumnFilter_caller<29, T, D, BrdColWrap>,
linearColumnFilter_caller<30, T, D, BrdColWrap>,
linearColumnFilter_caller<31, T, D, BrdColWrap>,
linearColumnFilter_caller<32, T, D, BrdColWrap>
}
};
loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
{
namespace column_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = KSIZE <= 16 ? 1 : 2;
#else
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 2;
const int HALO_SIZE = 2;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X];
const int x = blockIdx.x * BLOCK_DIM_X + threadIdx.x;
if (x >= src.cols)
return;
const T* src_col = src.ptr() + x;
const int yStart = blockIdx.y * (BLOCK_DIM_Y * PATCH_PER_BLOCK) + threadIdx.y;
if (blockIdx.y > 0)
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
}
else
{
//Upper halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
}
if (blockIdx.y + 2 < gridDim.y)
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
}
else
{
//Main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
//Lower halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int y = yStart + j * BLOCK_DIM_Y;
if (y < src.rows)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 16;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 16;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 2;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
B<T> brd(src.rows);
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T, typename D>
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReflect101>,
linearColumnFilter_caller< 2, T, D, BrdColReflect101>,
linearColumnFilter_caller< 3, T, D, BrdColReflect101>,
linearColumnFilter_caller< 4, T, D, BrdColReflect101>,
linearColumnFilter_caller< 5, T, D, BrdColReflect101>,
linearColumnFilter_caller< 6, T, D, BrdColReflect101>,
linearColumnFilter_caller< 7, T, D, BrdColReflect101>,
linearColumnFilter_caller< 8, T, D, BrdColReflect101>,
linearColumnFilter_caller< 9, T, D, BrdColReflect101>,
linearColumnFilter_caller<10, T, D, BrdColReflect101>,
linearColumnFilter_caller<11, T, D, BrdColReflect101>,
linearColumnFilter_caller<12, T, D, BrdColReflect101>,
linearColumnFilter_caller<13, T, D, BrdColReflect101>,
linearColumnFilter_caller<14, T, D, BrdColReflect101>,
linearColumnFilter_caller<15, T, D, BrdColReflect101>,
linearColumnFilter_caller<16, T, D, BrdColReflect101>,
linearColumnFilter_caller<17, T, D, BrdColReflect101>,
linearColumnFilter_caller<18, T, D, BrdColReflect101>,
linearColumnFilter_caller<19, T, D, BrdColReflect101>,
linearColumnFilter_caller<20, T, D, BrdColReflect101>,
linearColumnFilter_caller<21, T, D, BrdColReflect101>,
linearColumnFilter_caller<22, T, D, BrdColReflect101>,
linearColumnFilter_caller<23, T, D, BrdColReflect101>,
linearColumnFilter_caller<24, T, D, BrdColReflect101>,
linearColumnFilter_caller<25, T, D, BrdColReflect101>,
linearColumnFilter_caller<26, T, D, BrdColReflect101>,
linearColumnFilter_caller<27, T, D, BrdColReflect101>,
linearColumnFilter_caller<28, T, D, BrdColReflect101>,
linearColumnFilter_caller<29, T, D, BrdColReflect101>,
linearColumnFilter_caller<30, T, D, BrdColReflect101>,
linearColumnFilter_caller<31, T, D, BrdColReflect101>,
linearColumnFilter_caller<32, T, D, BrdColReflect101>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReplicate>,
linearColumnFilter_caller< 2, T, D, BrdColReplicate>,
linearColumnFilter_caller< 3, T, D, BrdColReplicate>,
linearColumnFilter_caller< 4, T, D, BrdColReplicate>,
linearColumnFilter_caller< 5, T, D, BrdColReplicate>,
linearColumnFilter_caller< 6, T, D, BrdColReplicate>,
linearColumnFilter_caller< 7, T, D, BrdColReplicate>,
linearColumnFilter_caller< 8, T, D, BrdColReplicate>,
linearColumnFilter_caller< 9, T, D, BrdColReplicate>,
linearColumnFilter_caller<10, T, D, BrdColReplicate>,
linearColumnFilter_caller<11, T, D, BrdColReplicate>,
linearColumnFilter_caller<12, T, D, BrdColReplicate>,
linearColumnFilter_caller<13, T, D, BrdColReplicate>,
linearColumnFilter_caller<14, T, D, BrdColReplicate>,
linearColumnFilter_caller<15, T, D, BrdColReplicate>,
linearColumnFilter_caller<16, T, D, BrdColReplicate>,
linearColumnFilter_caller<17, T, D, BrdColReplicate>,
linearColumnFilter_caller<18, T, D, BrdColReplicate>,
linearColumnFilter_caller<19, T, D, BrdColReplicate>,
linearColumnFilter_caller<20, T, D, BrdColReplicate>,
linearColumnFilter_caller<21, T, D, BrdColReplicate>,
linearColumnFilter_caller<22, T, D, BrdColReplicate>,
linearColumnFilter_caller<23, T, D, BrdColReplicate>,
linearColumnFilter_caller<24, T, D, BrdColReplicate>,
linearColumnFilter_caller<25, T, D, BrdColReplicate>,
linearColumnFilter_caller<26, T, D, BrdColReplicate>,
linearColumnFilter_caller<27, T, D, BrdColReplicate>,
linearColumnFilter_caller<28, T, D, BrdColReplicate>,
linearColumnFilter_caller<29, T, D, BrdColReplicate>,
linearColumnFilter_caller<30, T, D, BrdColReplicate>,
linearColumnFilter_caller<31, T, D, BrdColReplicate>,
linearColumnFilter_caller<32, T, D, BrdColReplicate>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColConstant>,
linearColumnFilter_caller< 2, T, D, BrdColConstant>,
linearColumnFilter_caller< 3, T, D, BrdColConstant>,
linearColumnFilter_caller< 4, T, D, BrdColConstant>,
linearColumnFilter_caller< 5, T, D, BrdColConstant>,
linearColumnFilter_caller< 6, T, D, BrdColConstant>,
linearColumnFilter_caller< 7, T, D, BrdColConstant>,
linearColumnFilter_caller< 8, T, D, BrdColConstant>,
linearColumnFilter_caller< 9, T, D, BrdColConstant>,
linearColumnFilter_caller<10, T, D, BrdColConstant>,
linearColumnFilter_caller<11, T, D, BrdColConstant>,
linearColumnFilter_caller<12, T, D, BrdColConstant>,
linearColumnFilter_caller<13, T, D, BrdColConstant>,
linearColumnFilter_caller<14, T, D, BrdColConstant>,
linearColumnFilter_caller<15, T, D, BrdColConstant>,
linearColumnFilter_caller<16, T, D, BrdColConstant>,
linearColumnFilter_caller<17, T, D, BrdColConstant>,
linearColumnFilter_caller<18, T, D, BrdColConstant>,
linearColumnFilter_caller<19, T, D, BrdColConstant>,
linearColumnFilter_caller<20, T, D, BrdColConstant>,
linearColumnFilter_caller<21, T, D, BrdColConstant>,
linearColumnFilter_caller<22, T, D, BrdColConstant>,
linearColumnFilter_caller<23, T, D, BrdColConstant>,
linearColumnFilter_caller<24, T, D, BrdColConstant>,
linearColumnFilter_caller<25, T, D, BrdColConstant>,
linearColumnFilter_caller<26, T, D, BrdColConstant>,
linearColumnFilter_caller<27, T, D, BrdColConstant>,
linearColumnFilter_caller<28, T, D, BrdColConstant>,
linearColumnFilter_caller<29, T, D, BrdColConstant>,
linearColumnFilter_caller<30, T, D, BrdColConstant>,
linearColumnFilter_caller<31, T, D, BrdColConstant>,
linearColumnFilter_caller<32, T, D, BrdColConstant>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColReflect>,
linearColumnFilter_caller< 2, T, D, BrdColReflect>,
linearColumnFilter_caller< 3, T, D, BrdColReflect>,
linearColumnFilter_caller< 4, T, D, BrdColReflect>,
linearColumnFilter_caller< 5, T, D, BrdColReflect>,
linearColumnFilter_caller< 6, T, D, BrdColReflect>,
linearColumnFilter_caller< 7, T, D, BrdColReflect>,
linearColumnFilter_caller< 8, T, D, BrdColReflect>,
linearColumnFilter_caller< 9, T, D, BrdColReflect>,
linearColumnFilter_caller<10, T, D, BrdColReflect>,
linearColumnFilter_caller<11, T, D, BrdColReflect>,
linearColumnFilter_caller<12, T, D, BrdColReflect>,
linearColumnFilter_caller<13, T, D, BrdColReflect>,
linearColumnFilter_caller<14, T, D, BrdColReflect>,
linearColumnFilter_caller<15, T, D, BrdColReflect>,
linearColumnFilter_caller<16, T, D, BrdColReflect>,
linearColumnFilter_caller<17, T, D, BrdColReflect>,
linearColumnFilter_caller<18, T, D, BrdColReflect>,
linearColumnFilter_caller<19, T, D, BrdColReflect>,
linearColumnFilter_caller<20, T, D, BrdColReflect>,
linearColumnFilter_caller<21, T, D, BrdColReflect>,
linearColumnFilter_caller<22, T, D, BrdColReflect>,
linearColumnFilter_caller<23, T, D, BrdColReflect>,
linearColumnFilter_caller<24, T, D, BrdColReflect>,
linearColumnFilter_caller<25, T, D, BrdColReflect>,
linearColumnFilter_caller<26, T, D, BrdColReflect>,
linearColumnFilter_caller<27, T, D, BrdColReflect>,
linearColumnFilter_caller<28, T, D, BrdColReflect>,
linearColumnFilter_caller<29, T, D, BrdColReflect>,
linearColumnFilter_caller<30, T, D, BrdColReflect>,
linearColumnFilter_caller<31, T, D, BrdColReflect>,
linearColumnFilter_caller<32, T, D, BrdColReflect>
},
{
0,
linearColumnFilter_caller< 1, T, D, BrdColWrap>,
linearColumnFilter_caller< 2, T, D, BrdColWrap>,
linearColumnFilter_caller< 3, T, D, BrdColWrap>,
linearColumnFilter_caller< 4, T, D, BrdColWrap>,
linearColumnFilter_caller< 5, T, D, BrdColWrap>,
linearColumnFilter_caller< 6, T, D, BrdColWrap>,
linearColumnFilter_caller< 7, T, D, BrdColWrap>,
linearColumnFilter_caller< 8, T, D, BrdColWrap>,
linearColumnFilter_caller< 9, T, D, BrdColWrap>,
linearColumnFilter_caller<10, T, D, BrdColWrap>,
linearColumnFilter_caller<11, T, D, BrdColWrap>,
linearColumnFilter_caller<12, T, D, BrdColWrap>,
linearColumnFilter_caller<13, T, D, BrdColWrap>,
linearColumnFilter_caller<14, T, D, BrdColWrap>,
linearColumnFilter_caller<15, T, D, BrdColWrap>,
linearColumnFilter_caller<16, T, D, BrdColWrap>,
linearColumnFilter_caller<17, T, D, BrdColWrap>,
linearColumnFilter_caller<18, T, D, BrdColWrap>,
linearColumnFilter_caller<19, T, D, BrdColWrap>,
linearColumnFilter_caller<20, T, D, BrdColWrap>,
linearColumnFilter_caller<21, T, D, BrdColWrap>,
linearColumnFilter_caller<22, T, D, BrdColWrap>,
linearColumnFilter_caller<23, T, D, BrdColWrap>,
linearColumnFilter_caller<24, T, D, BrdColWrap>,
linearColumnFilter_caller<25, T, D, BrdColWrap>,
linearColumnFilter_caller<26, T, D, BrdColWrap>,
linearColumnFilter_caller<27, T, D, BrdColWrap>,
linearColumnFilter_caller<28, T, D, BrdColWrap>,
linearColumnFilter_caller<29, T, D, BrdColWrap>,
linearColumnFilter_caller<30, T, D, BrdColWrap>,
linearColumnFilter_caller<31, T, D, BrdColWrap>,
linearColumnFilter_caller<32, T, D, BrdColWrap>
}
};
loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,131 +1,131 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz<T> dst, int top, int left)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
dst.ptr(y)[x] = src(y - top, x - left);
}
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, int top, int left,
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<T> brd(src.rows, src.cols, VecTraits<T>::make(borderValue));
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode,
const T* borderValue, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type vec_type;
typedef void (*caller_t)(const PtrStepSz<vec_type>& src, const PtrStepSz<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
static const caller_t callers[5] =
{
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
};
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
}
template void copyMakeBorder_gpu<uchar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<ushort, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<short, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<float, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz<T> dst, int top, int left)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
dst.ptr(y)[x] = src(y - top, x - left);
}
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, int top, int left,
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<T> brd(src.rows, src.cols, VecTraits<T>::make(borderValue));
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode,
const T* borderValue, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type vec_type;
typedef void (*caller_t)(const PtrStepSz<vec_type>& src, const PtrStepSz<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
static const caller_t callers[5] =
{
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
};
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
}
template void copyMakeBorder_gpu<uchar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<uchar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<schar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<ushort, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<ushort, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<short, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<short, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<int, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
//template void copyMakeBorder_gpu<float, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -1,151 +1,151 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
namespace cv { namespace gpu { namespace device
{
namespace gfft
{
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__device__ uint g_counter = 0;
template <class Mask> __global__ void findCorners(float threshold, const Mask mask, float2* corners, uint max_count, int rows, int cols)
{
#if __CUDA_ARCH__ >= 110
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j))
{
float val = tex2D(eigTex, j, i);
if (val > threshold)
{
float maxVal = val;
maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal);
maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal);
if (val == maxVal)
{
const uint ind = atomicInc(&g_counter, (uint)(-1));
if (ind < max_count)
corners[ind] = make_float2(j, i);
}
}
}
#endif // __CUDA_ARCH__ >= 110
}
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(uint)) );
bindTexture(&eigTex, eig);
dim3 block(16, 16);
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
uint count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );
return min(count, max_count);
}
class EigGreater
{
public:
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
{
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
}
};
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
{
bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners);
thrust::sort(ptr, ptr + count, EigGreater());
}
} // namespace optical_flow
}}}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
namespace cv { namespace gpu { namespace device
{
namespace gfft
{
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__device__ uint g_counter = 0;
template <class Mask> __global__ void findCorners(float threshold, const Mask mask, float2* corners, uint max_count, int rows, int cols)
{
#if __CUDA_ARCH__ >= 110
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j))
{
float val = tex2D(eigTex, j, i);
if (val > threshold)
{
float maxVal = val;
maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal);
maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal);
if (val == maxVal)
{
const uint ind = atomicInc(&g_counter, (uint)(-1));
if (ind < max_count)
corners[ind] = make_float2(j, i);
}
}
}
#endif // __CUDA_ARCH__ >= 110
}
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(uint)) );
bindTexture(&eigTex, eig);
dim3 block(16, 16);
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
uint count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );
return min(count, max_count);
}
class EigGreater
{
public:
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
{
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
}
};
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
{
bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners);
thrust::sort(ptr, ptr + count, EigGreater());
}
} // namespace optical_flow
}}}
#endif /* CUDA_DISABLER */

View File

@@ -1,224 +1,224 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
#define UINT_BITS 32U
//Warps == subhistograms per threadblock
#define WARP_COUNT 6
//Threadblock size
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256
//Shared memory per threadblock
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
#define PARTIAL_HISTOGRAM256_COUNT 240
#define MERGE_THREADBLOCK_SIZE 256
#define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
namespace hist
{
#if (!USE_SMEM_ATOMICS)
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
{
uint count;
do
{
count = s_WarpHist[data] & TAG_MASK;
count = threadTag | (count + 1);
s_WarpHist[data] = count;
} while (s_WarpHist[data] != count);
}
#else
#define TAG_MASK 0xFFFFFFFFU
__forceinline__ __device__ void addByte(uint* s_WarpHist, uint data, uint threadTag)
{
atomicAdd(s_WarpHist + data, 1);
}
#endif
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
{
uint x = pos_x << 2;
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
}
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
{
//Per-warp subhistogram storage
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
//Clear shared memory storage for current threadblock before processing
#pragma unroll
for (uint i = 0; i < (HISTOGRAM256_THREADBLOCK_MEMORY / HISTOGRAM256_THREADBLOCK_SIZE); i++)
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
//Cycle through the entire data set, update subhistograms for each warp
const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
__syncthreads();
const uint colsui = d_Data.step / sizeof(uint);
for(uint pos = blockIdx.x * blockDim.x + threadIdx.x; pos < dataCount; pos += blockDim.x * gridDim.x)
{
uint pos_y = pos / colsui;
uint pos_x = pos % colsui;
uint data = d_Data.ptr(pos_y)[pos_x];
addWord(s_WarpHist, data, tag, pos_x, cols);
}
//Merge per-warp histograms into per-block and write to global memory
__syncthreads();
for(uint bin = threadIdx.x; bin < HISTOGRAM256_BIN_COUNT; bin += HISTOGRAM256_THREADBLOCK_SIZE)
{
uint sum = 0;
for (uint i = 0; i < WARP_COUNT; i++)
sum += s_Hist[bin + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK;
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
}
}
////////////////////////////////////////////////////////////////////////////////
// Merge histogram256() output
// Run one threadblock per bin; each threadblock adds up the same bin counter
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
// takes only a fraction of total processing time
////////////////////////////////////////////////////////////////////////////////
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
{
uint sum = 0;
#pragma unroll
for (uint i = threadIdx.x; i < PARTIAL_HISTOGRAM256_COUNT; i += MERGE_THREADBLOCK_SIZE)
sum += d_PartialHistograms[blockIdx.x + i * HISTOGRAM256_BIN_COUNT];
__shared__ uint data[MERGE_THREADBLOCK_SIZE];
data[threadIdx.x] = sum;
for (uint stride = MERGE_THREADBLOCK_SIZE / 2; stride > 0; stride >>= 1)
{
__syncthreads();
if(threadIdx.x < stride)
data[threadIdx.x] += data[threadIdx.x + stride];
}
if(threadIdx.x == 0)
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
}
void histogram256_gpu(PtrStepSzb src, int* hist, uint* buf, cudaStream_t stream)
{
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
PtrStepSz<uint>(src),
buf,
static_cast<uint>(src.rows * src.step / sizeof(uint)),
src.cols);
cudaSafeCall( cudaGetLastError() );
mergeHistogram256<<<HISTOGRAM256_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(buf, hist);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__constant__ int c_lut[256];
__global__ void equalizeHist(const PtrStepSzb src, PtrStepb dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < src.cols && y < src.rows)
{
const uchar val = src.ptr(y)[x];
const int lut = c_lut[val];
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
}
}
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
{
dim3 block(16, 16);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
} // namespace hist
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
#define UINT_BITS 32U
//Warps == subhistograms per threadblock
#define WARP_COUNT 6
//Threadblock size
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256
//Shared memory per threadblock
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
#define PARTIAL_HISTOGRAM256_COUNT 240
#define MERGE_THREADBLOCK_SIZE 256
#define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
namespace hist
{
#if (!USE_SMEM_ATOMICS)
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
{
uint count;
do
{
count = s_WarpHist[data] & TAG_MASK;
count = threadTag | (count + 1);
s_WarpHist[data] = count;
} while (s_WarpHist[data] != count);
}
#else
#define TAG_MASK 0xFFFFFFFFU
__forceinline__ __device__ void addByte(uint* s_WarpHist, uint data, uint threadTag)
{
atomicAdd(s_WarpHist + data, 1);
}
#endif
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
{
uint x = pos_x << 2;
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
}
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
{
//Per-warp subhistogram storage
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
//Clear shared memory storage for current threadblock before processing
#pragma unroll
for (uint i = 0; i < (HISTOGRAM256_THREADBLOCK_MEMORY / HISTOGRAM256_THREADBLOCK_SIZE); i++)
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
//Cycle through the entire data set, update subhistograms for each warp
const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
__syncthreads();
const uint colsui = d_Data.step / sizeof(uint);
for(uint pos = blockIdx.x * blockDim.x + threadIdx.x; pos < dataCount; pos += blockDim.x * gridDim.x)
{
uint pos_y = pos / colsui;
uint pos_x = pos % colsui;
uint data = d_Data.ptr(pos_y)[pos_x];
addWord(s_WarpHist, data, tag, pos_x, cols);
}
//Merge per-warp histograms into per-block and write to global memory
__syncthreads();
for(uint bin = threadIdx.x; bin < HISTOGRAM256_BIN_COUNT; bin += HISTOGRAM256_THREADBLOCK_SIZE)
{
uint sum = 0;
for (uint i = 0; i < WARP_COUNT; i++)
sum += s_Hist[bin + i * HISTOGRAM256_BIN_COUNT] & TAG_MASK;
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
}
}
////////////////////////////////////////////////////////////////////////////////
// Merge histogram256() output
// Run one threadblock per bin; each threadblock adds up the same bin counter
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
// takes only a fraction of total processing time
////////////////////////////////////////////////////////////////////////////////
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
{
uint sum = 0;
#pragma unroll
for (uint i = threadIdx.x; i < PARTIAL_HISTOGRAM256_COUNT; i += MERGE_THREADBLOCK_SIZE)
sum += d_PartialHistograms[blockIdx.x + i * HISTOGRAM256_BIN_COUNT];
__shared__ uint data[MERGE_THREADBLOCK_SIZE];
data[threadIdx.x] = sum;
for (uint stride = MERGE_THREADBLOCK_SIZE / 2; stride > 0; stride >>= 1)
{
__syncthreads();
if(threadIdx.x < stride)
data[threadIdx.x] += data[threadIdx.x + stride];
}
if(threadIdx.x == 0)
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
}
void histogram256_gpu(PtrStepSzb src, int* hist, uint* buf, cudaStream_t stream)
{
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
PtrStepSz<uint>(src),
buf,
static_cast<uint>(src.rows * src.step / sizeof(uint)),
src.cols);
cudaSafeCall( cudaGetLastError() );
mergeHistogram256<<<HISTOGRAM256_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(buf, hist);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__constant__ int c_lut[256];
__global__ void equalizeHist(const PtrStepSzb src, PtrStepb dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < src.cols && y < src.rows)
{
const uchar val = src.ptr(y)[x];
const int lut = c_lut[val];
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
}
}
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
{
dim3 block(16, 16);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
} // namespace hist
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,100 +1,100 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_internal_shared_HPP__
#define __OPENCV_internal_shared_HPP__
#include <cuda_runtime.h>
#include <npp.h>
#include "NPP_staging.hpp"
#include "opencv2/gpu/devmem2d.hpp"
#include "safe_call.hpp"
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu
{
enum
{
BORDER_REFLECT101_GPU = 0,
BORDER_REPLICATE_GPU,
BORDER_CONSTANT_GPU,
BORDER_REFLECT_GPU,
BORDER_WRAP_GPU
};
class NppStreamHandler
{
public:
inline explicit NppStreamHandler(cudaStream_t newStream = 0)
{
oldStream = nppGetStream();
nppSetStream(newStream);
}
inline ~NppStreamHandler()
{
nppSetStream(oldStream);
}
private:
cudaStream_t oldStream;
};
class NppStStreamHandler
{
public:
inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
{
oldStream = nppStSetActiveCUDAstream(newStream);
}
inline ~NppStStreamHandler()
{
nppStSetActiveCUDAstream(oldStream);
}
private:
cudaStream_t oldStream;
};
}}
#endif /* __OPENCV_internal_shared_HPP__ */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_internal_shared_HPP__
#define __OPENCV_internal_shared_HPP__
#include <cuda_runtime.h>
#include <npp.h>
#include "NPP_staging.hpp"
#include "opencv2/gpu/devmem2d.hpp"
#include "safe_call.hpp"
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu
{
enum
{
BORDER_REFLECT101_GPU = 0,
BORDER_REPLICATE_GPU,
BORDER_CONSTANT_GPU,
BORDER_REFLECT_GPU,
BORDER_WRAP_GPU
};
class NppStreamHandler
{
public:
inline explicit NppStreamHandler(cudaStream_t newStream = 0)
{
oldStream = nppGetStream();
nppSetStream(newStream);
}
inline ~NppStreamHandler()
{
nppSetStream(oldStream);
}
private:
cudaStream_t oldStream;
};
class NppStStreamHandler
{
public:
inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
{
oldStream = nppStSetActiveCUDAstream(newStream);
}
inline ~NppStStreamHandler()
{
nppStSetActiveCUDAstream(oldStream);
}
private:
cudaStream_t oldStream;
};
}}
#endif /* __OPENCV_internal_shared_HPP__ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,217 +1,217 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
namespace mathfunc
{
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
struct Nothing
{
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
{
}
};
struct Magnitude
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
}
};
struct MagnitudeSqr
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
}
};
struct Atan2
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
{
float angle = ::atan2f(y_data, x_data);
angle += (angle < 0) * 2.0 * CV_PI;
dst[y * dst_step + x] = scale * angle;
}
};
template <typename Mag, typename Angle>
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
float x_data = xptr[y * x_step + x];
float y_data = yptr[y * y_step + x];
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
}
}
struct NonEmptyMag
{
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
{
return mag[y * mag_step + x];
}
};
struct EmptyMag
{
static __device__ __forceinline__ float get(const float*, size_t, int, int)
{
return 1.0f;
}
};
template <typename Mag>
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
float mag_data = Mag::get(mag, mag_step, x, y);
float angle_data = angle[y * angle_step + x];
float sin_a, cos_a;
::sincosf(scale * angle_data, &sin_a, &cos_a);
xptr[y * x_step + x] = mag_data * cos_a;
yptr[y * y_step + x] = mag_data * sin_a;
}
}
template <typename Mag, typename Angle>
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(x.cols, threads.x);
grid.y = divUp(x.rows, threads.y);
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2][2][2] =
{
{
{
cartToPolar_caller<Magnitude, Atan2>,
cartToPolar_caller<Magnitude, Nothing>
},
{
cartToPolar_caller<MagnitudeSqr, Atan2>,
cartToPolar_caller<MagnitudeSqr, Nothing>,
}
},
{
{
cartToPolar_caller<Nothing, Atan2>,
cartToPolar_caller<Nothing, Nothing>
},
{
cartToPolar_caller<Nothing, Atan2>,
cartToPolar_caller<Nothing, Nothing>,
}
}
};
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
}
template <typename Mag>
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(mag.cols, threads.x);
grid.y = divUp(mag.rows, threads.y);
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] =
{
polarToCart_caller<NonEmptyMag>,
polarToCart_caller<EmptyMag>
};
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
}
} // namespace mathfunc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device
{
namespace mathfunc
{
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
struct Nothing
{
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
{
}
};
struct Magnitude
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
}
};
struct MagnitudeSqr
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
}
};
struct Atan2
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
{
float angle = ::atan2f(y_data, x_data);
angle += (angle < 0) * 2.0 * CV_PI;
dst[y * dst_step + x] = scale * angle;
}
};
template <typename Mag, typename Angle>
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
float x_data = xptr[y * x_step + x];
float y_data = yptr[y * y_step + x];
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
}
}
struct NonEmptyMag
{
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
{
return mag[y * mag_step + x];
}
};
struct EmptyMag
{
static __device__ __forceinline__ float get(const float*, size_t, int, int)
{
return 1.0f;
}
};
template <typename Mag>
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
float mag_data = Mag::get(mag, mag_step, x, y);
float angle_data = angle[y * angle_step + x];
float sin_a, cos_a;
::sincosf(scale * angle_data, &sin_a, &cos_a);
xptr[y * x_step + x] = mag_data * cos_a;
yptr[y * y_step + x] = mag_data * sin_a;
}
}
template <typename Mag, typename Angle>
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(x.cols, threads.x);
grid.y = divUp(x.rows, threads.y);
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2][2][2] =
{
{
{
cartToPolar_caller<Magnitude, Atan2>,
cartToPolar_caller<Magnitude, Nothing>
},
{
cartToPolar_caller<MagnitudeSqr, Atan2>,
cartToPolar_caller<MagnitudeSqr, Nothing>,
}
},
{
{
cartToPolar_caller<Nothing, Atan2>,
cartToPolar_caller<Nothing, Nothing>
},
{
cartToPolar_caller<Nothing, Atan2>,
cartToPolar_caller<Nothing, Nothing>,
}
}
};
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
}
template <typename Mag>
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(mag.cols, threads.x);
grid.y = divUp(mag.rows, threads.y);
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] =
{
polarToCart_caller<NonEmptyMag>,
polarToCart_caller<EmptyMag>
};
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
}
} // namespace mathfunc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

File diff suppressed because it is too large Load Diff

View File

@@ -74,12 +74,12 @@ namespace cv { namespace gpu { namespace device
const int i = blockDim.y * blockIdx.y + threadIdx.y;
const int j = blockDim.x * blockIdx.x + threadIdx.x;
if (j >= dst.cols || i >= dst.rows)
return;
int bsize = search_radius + block_radius;
int search_window = 2 * search_radius + 1;
int search_window = 2 * search_radius + 1;
float minus_search_window2_inv = -1.f/(search_window * search_window);
value_type sum1 = VecTraits<value_type>::all(0);
@@ -87,16 +87,16 @@ namespace cv { namespace gpu { namespace device
if (j - bsize >= 0 && j + bsize < dst.cols && i - bsize >= 0 && i + bsize < dst.rows)
{
for(float y = -search_radius; y <= search_radius; ++y)
for(float y = -search_radius; y <= search_radius; ++y)
for(float x = -search_radius; x <= search_radius; ++x)
{
{
float dist2 = 0;
for(float ty = -block_radius; ty <= block_radius; ++ty)
for(float tx = -block_radius; tx <= block_radius; ++tx)
{
value_type bv = saturate_cast<value_type>(src(i + y + ty, j + x + tx));
value_type av = saturate_cast<value_type>(src(i + ty, j + tx));
dist2 += norm2(av - bv);
}
@@ -119,10 +119,10 @@ namespace cv { namespace gpu { namespace device
for(float tx = -block_radius; tx <= block_radius; ++tx)
{
value_type bv = saturate_cast<value_type>(b.at(i + y + ty, j + x + tx, src));
value_type av = saturate_cast<value_type>(b.at(i + ty, j + tx, src));
value_type av = saturate_cast<value_type>(b.at(i + ty, j + tx, src));
dist2 += norm2(av - bv);
}
float w = __expf(dist2 * noise_mult + (x * x + y * y) * minus_search_window2_inv);
sum1 = sum1 + w * saturate_cast<value_type>(b.at(i + y, j + x, src));
@@ -144,9 +144,9 @@ namespace cv { namespace gpu { namespace device
B<T> b(src.rows, src.cols);
int block_window = 2 * block_radius + 1;
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
float noise_mult = minus_h2_inv/(block_window * block_window);
cudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) );
nlm_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, search_radius, block_radius, noise_mult);
cudaSafeCall ( cudaGetLastError () );
@@ -160,7 +160,7 @@ namespace cv { namespace gpu { namespace device
{
typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb dst, int search_radius, int block_radius, float h, cudaStream_t stream);
static func_t funcs[] =
static func_t funcs[] =
{
nlm_caller<T, BrdReflect101>,
nlm_caller<T, BrdReplicate>,
@@ -183,7 +183,7 @@ namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
{
__device__ __forceinline__ int calcDist(const uchar& a, const uchar& b) { return (a-b)*(a-b); }
__device__ __forceinline__ int calcDist(const uchar2& a, const uchar2& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y); }
__device__ __forceinline__ int calcDist(const uchar3& a, const uchar3& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y) + (a.z-b.z)*(a.z-b.z); }
@@ -193,7 +193,7 @@ namespace cv { namespace gpu { namespace device
enum
{
CTA_SIZE = 128,
TILE_COLS = 128,
TILE_ROWS = 32,
@@ -217,7 +217,7 @@ namespace cv { namespace gpu { namespace device
PtrStep<T> src;
mutable PtrStepi buffer;
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
@@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace device
int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx));
dist_sums[index] += dist;
col_sums(tx + block_radius, index) += dist;
col_sums(tx + block_radius, index) += dist;
}
#endif
@@ -265,9 +265,9 @@ namespace cv { namespace gpu { namespace device
}
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{
{
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
{
int y = index / search_window;
int x = index - y * search_window;
@@ -280,8 +280,8 @@ namespace cv { namespace gpu { namespace device
int col_sum = 0;
for (int ty = -block_radius; ty <= block_radius; ++ty)
col_sum += calcDist(src(ay + ty, ax), src(by + ty, bx));
col_sum += calcDist(src(ay + ty, ax), src(by + ty, bx));
dist_sums[index] += col_sum - col_sums(first, index);
col_sums(first, index) = col_sum;
@@ -298,7 +298,7 @@ namespace cv { namespace gpu { namespace device
T a_down = src(ay + block_radius, ax);
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
{
int y = index / search_window;
int x = index - y * search_window;
@@ -309,9 +309,9 @@ namespace cv { namespace gpu { namespace device
T b_down = src(by + block_radius, bx);
int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up);
dist_sums[index] += col_sum - col_sums(first, index);
col_sums(first, index) = col_sum;
col_sums(first, index) = col_sum;
up_col_sums(j, index) = col_sum;
}
}
@@ -339,14 +339,14 @@ namespace cv { namespace gpu { namespace device
sum = sum + weight * saturate_cast<sum_type>(src(sy + y, sx + x));
}
volatile __shared__ float cta_buffer[CTA_SIZE];
int tid = threadIdx.x;
cta_buffer[tid] = weights_sum;
__syncthreads();
Block::reduce<CTA_SIZE>(cta_buffer, plus());
Block::reduce<CTA_SIZE>(cta_buffer, plus());
weights_sum = cta_buffer[0];
__syncthreads();
@@ -356,7 +356,7 @@ namespace cv { namespace gpu { namespace device
{
cta_buffer[tid] = reinterpret_cast<float*>(&sum)[n];
__syncthreads();
Block::reduce<CTA_SIZE>(cta_buffer, plus());
Block::reduce<CTA_SIZE>(cta_buffer, plus());
reinterpret_cast<float*>(&sum)[n] = cta_buffer[0];
__syncthreads();
@@ -388,7 +388,7 @@ namespace cv { namespace gpu { namespace device
for (int i = tby; i < tey; ++i)
for (int j = tbx; j < tex; ++j)
{
{
__syncthreads();
if (j == tbx)
@@ -397,7 +397,7 @@ namespace cv { namespace gpu { namespace device
first = 0;
}
else
{
{
if (i == tby)
shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums);
else
@@ -407,7 +407,7 @@ namespace cv { namespace gpu { namespace device
}
__syncthreads();
convolve_window(i, j, dist_sums, col_sums, up_col_sums, dst(i, j));
}
}
@@ -418,7 +418,7 @@ namespace cv { namespace gpu { namespace device
__global__ void fast_nlm_kernel(const FastNonLocalMenas<T> fnlm, PtrStepSz<T> dst) { fnlm(dst); }
void nln_fast_get_buffer_size(const PtrStepSzb& src, int search_window, int block_window, int& buffer_cols, int& buffer_rows)
{
{
typedef FastNonLocalMenas<uchar> FNLM;
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
@@ -434,13 +434,13 @@ namespace cv { namespace gpu { namespace device
FNLM fnlm(search_window, block_window, h);
fnlm.src = (PtrStepSz<T>)src;
fnlm.buffer = buffer;
fnlm.buffer = buffer;
dim3 block(FNLM::CTA_SIZE, 1);
dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS));
int smem = search_window * search_window * sizeof(int);
fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst);
cudaSafeCall ( cudaGetLastError () );
if (stream == 0)
@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace device
template void nlm_fast_gpu<uchar2>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
template void nlm_fast_gpu<uchar3>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
__global__ void fnlm_split_kernel(const PtrStepSz<uchar3> lab, PtrStepb l, PtrStep<uchar2> ab)
{
@@ -462,10 +462,10 @@ namespace cv { namespace gpu { namespace device
{
uchar3 p = lab(y, x);
ab(y,x) = make_uchar2(p.y, p.z);
l(y,x) = p.x;
l(y,x) = p.x;
}
}
void fnlm_split_channels(const PtrStepSz<uchar3>& lab, PtrStepb l, PtrStep<uchar2> ab, cudaStream_t stream)
{
dim3 b(32, 8);
@@ -485,7 +485,7 @@ namespace cv { namespace gpu { namespace device
if (x < lab.cols && y < lab.rows)
{
uchar2 p = ab(y, x);
lab(y, x) = make_uchar3(l(y, x), p.x, p.y);
lab(y, x) = make_uchar3(l(y, x), p.x, p.y);
}
}

View File

@@ -1,220 +1,220 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device
{
namespace optical_flow
{
#define NEEDLE_MAP_SCALE 16
#define NUM_VERTS_PER_ARROW 6
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
{
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
volatile float* u_col_sum = smem;
volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE;
const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
const int y = blockIdx.y * NEEDLE_MAP_SCALE;
u_col_sum[threadIdx.x] = 0;
v_col_sum[threadIdx.x] = 0;
#pragma unroll
for(int i = 0; i < NEEDLE_MAP_SCALE; ++i)
{
u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
}
if (threadIdx.x < 8)
{
// now add the column sums
const uint X = threadIdx.x;
if (X | 0xfe == 0xfe) // bit 0 is 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
}
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
}
if (X | 0xf8 == 0xf8)
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
}
if (X == 0)
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8];
}
}
if (threadIdx.x == 0)
{
const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE);
u_col_sum[0] *= coeff;
v_col_sum[0] *= coeff;
u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0];
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
}
}
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
{
const dim3 block(NEEDLE_MAP_SCALE);
const dim3 grid(u_avg.cols, u_avg.rows);
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
{
// test - just draw a triangle at each pixel
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
float3 v[NUM_VERTS_PER_ARROW];
if (x < u_avg.cols && y < u_avg.rows)
{
const float u_avg_val = u_avg(y, x);
const float v_avg_val = v_avg(y, x);
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
r = fmin(14.0f * (r / max_flow), 14.0f);
v[0].z = 1.0f;
v[1].z = 0.7f;
v[2].z = 0.7f;
v[3].z = 0.7f;
v[4].z = 0.7f;
v[5].z = 1.0f;
v[0].x = arrow_x;
v[0].y = arrow_y;
v[5].x = arrow_x;
v[5].y = arrow_y;
v[2].x = arrow_x + r * ::cosf(theta);
v[2].y = arrow_y + r * ::sinf(theta);
v[3].x = v[2].x;
v[3].y = v[2].y;
r = ::fmin(r, 2.5f);
v[1].x = arrow_x + r * ::cosf(theta - CV_PI / 2.0f);
v[1].y = arrow_y + r * ::sinf(theta - CV_PI / 2.0f);
v[4].x = arrow_x + r * ::cosf(theta + CV_PI / 2.0f);
v[4].y = arrow_y + r * ::sinf(theta + CV_PI / 2.0f);
int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[0].x * xscale;
vertex_data[indx++] = v[0].y * yscale;
vertex_data[indx++] = v[0].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[1].x * xscale;
vertex_data[indx++] = v[1].y * yscale;
vertex_data[indx++] = v[1].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[2].x * xscale;
vertex_data[indx++] = v[2].y * yscale;
vertex_data[indx++] = v[2].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[3].x * xscale;
vertex_data[indx++] = v[3].y * yscale;
vertex_data[indx++] = v[3].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[4].x * xscale;
vertex_data[indx++] = v[4].y * yscale;
vertex_data[indx++] = v[4].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[5].x * xscale;
vertex_data[indx++] = v[5].y * yscale;
vertex_data[indx++] = v[5].z;
}
}
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
{
const dim3 block(16);
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
namespace cv { namespace gpu { namespace device
{
namespace optical_flow
{
#define NEEDLE_MAP_SCALE 16
#define NUM_VERTS_PER_ARROW 6
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
{
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
volatile float* u_col_sum = smem;
volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE;
const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
const int y = blockIdx.y * NEEDLE_MAP_SCALE;
u_col_sum[threadIdx.x] = 0;
v_col_sum[threadIdx.x] = 0;
#pragma unroll
for(int i = 0; i < NEEDLE_MAP_SCALE; ++i)
{
u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
}
if (threadIdx.x < 8)
{
// now add the column sums
const uint X = threadIdx.x;
if (X | 0xfe == 0xfe) // bit 0 is 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
}
if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
}
if (X | 0xf8 == 0xf8)
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4];
}
if (X == 0)
{
u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8];
v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8];
}
}
if (threadIdx.x == 0)
{
const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE);
u_col_sum[0] *= coeff;
v_col_sum[0] *= coeff;
u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0];
v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
}
}
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
{
const dim3 block(NEEDLE_MAP_SCALE);
const dim3 grid(u_avg.cols, u_avg.rows);
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
{
// test - just draw a triangle at each pixel
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f;
float3 v[NUM_VERTS_PER_ARROW];
if (x < u_avg.cols && y < u_avg.rows)
{
const float u_avg_val = u_avg(y, x);
const float v_avg_val = v_avg(y, x);
const float theta = ::atan2f(v_avg_val, u_avg_val);// + CV_PI;
float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val);
r = fmin(14.0f * (r / max_flow), 14.0f);
v[0].z = 1.0f;
v[1].z = 0.7f;
v[2].z = 0.7f;
v[3].z = 0.7f;
v[4].z = 0.7f;
v[5].z = 1.0f;
v[0].x = arrow_x;
v[0].y = arrow_y;
v[5].x = arrow_x;
v[5].y = arrow_y;
v[2].x = arrow_x + r * ::cosf(theta);
v[2].y = arrow_y + r * ::sinf(theta);
v[3].x = v[2].x;
v[3].y = v[2].y;
r = ::fmin(r, 2.5f);
v[1].x = arrow_x + r * ::cosf(theta - CV_PI / 2.0f);
v[1].y = arrow_y + r * ::sinf(theta - CV_PI / 2.0f);
v[4].x = arrow_x + r * ::cosf(theta + CV_PI / 2.0f);
v[4].y = arrow_y + r * ::sinf(theta + CV_PI / 2.0f);
int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[0].x * xscale;
vertex_data[indx++] = v[0].y * yscale;
vertex_data[indx++] = v[0].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[1].x * xscale;
vertex_data[indx++] = v[1].y * yscale;
vertex_data[indx++] = v[1].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[2].x * xscale;
vertex_data[indx++] = v[2].y * yscale;
vertex_data[indx++] = v[2].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[3].x * xscale;
vertex_data[indx++] = v[3].y * yscale;
vertex_data[indx++] = v[3].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[4].x * xscale;
vertex_data[indx++] = v[4].y * yscale;
vertex_data[indx++] = v[4].z;
color_data[indx] = (theta - CV_PI) / CV_PI * 180.0f;
vertex_data[indx++] = v[5].x * xscale;
vertex_data[indx++] = v[5].y * yscale;
vertex_data[indx++] = v[5].z;
}
}
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
{
const dim3 block(16);
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -1,422 +1,422 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace orb
{
////////////////////////////////////////////////////////////////////////////////////////////////////////
// cull
int cull_gpu(int* loc, float* response, int size, int n_points)
{
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
return n_points;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// HarrisResponses
__global__ void HarrisResponses(const PtrStepb img, const short2* loc_, float* response, const int npoints, const int blockSize, const float harris_k)
{
__shared__ int smem[8 * 32];
volatile int* srow = smem + threadIdx.y * blockDim.x;
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
if (ptidx < npoints)
{
const short2 loc = loc_[ptidx];
const int r = blockSize / 2;
const int x0 = loc.x - r;
const int y0 = loc.y - r;
int a = 0, b = 0, c = 0;
for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
{
const int i = ind / blockSize;
const int j = ind % blockSize;
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));
a += Ix * Ix;
b += Iy * Iy;
c += Ix * Iy;
}
reduce<32>(srow, a, threadIdx.x, plus<volatile int>());
reduce<32>(srow, b, threadIdx.x, plus<volatile int>());
reduce<32>(srow, c, threadIdx.x, plus<volatile int>());
if (threadIdx.x == 0)
{
float scale = (1 << 2) * blockSize * 255.0f;
scale = 1.0f / scale;
const float scale_sq_sq = scale * scale * scale * scale;
response[ptidx] = ((float)a * b - (float)c * c - harris_k * ((float)a + b) * ((float)a + b)) * scale_sq_sq;
}
}
}
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid;
grid.x = divUp(npoints, block.y);
HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// IC_Angle
__constant__ int c_u_max[32];
void loadUMax(const int* u_max, int count)
{
cudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
}
__global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k)
{
__shared__ int smem[8 * 32];
volatile int* srow = smem + threadIdx.y * blockDim.x;
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
if (ptidx < npoints)
{
int m_01 = 0, m_10 = 0;
const short2 loc = loc_[ptidx];
// Treat the center line differently, v=0
for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
m_10 += u * image(loc.y, loc.x + u);
reduce<32>(srow, m_10, threadIdx.x, plus<volatile int>());
for (int v = 1; v <= half_k; ++v)
{
// Proceed over the two lines
int v_sum = 0;
int m_sum = 0;
const int d = c_u_max[v];
for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
{
int val_plus = image(loc.y + v, loc.x + u);
int val_minus = image(loc.y - v, loc.x + u);
v_sum += (val_plus - val_minus);
m_sum += u * (val_plus + val_minus);
}
reduce<32>(srow, v_sum, threadIdx.x, plus<volatile int>());
reduce<32>(srow, m_sum, threadIdx.x, plus<volatile int>());
m_10 += m_sum;
m_01 += v * v_sum;
}
if (threadIdx.x == 0)
{
float kp_dir = ::atan2f((float)m_01, (float)m_10);
kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
kp_dir *= 180.0f / CV_PI;
angle[ptidx] = kp_dir;
}
}
}
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid;
grid.x = divUp(npoints, block.y);
IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// computeOrbDescriptor
template <int WTA_K> struct OrbDescriptor;
#define GET_VALUE(idx) \
img(loc.y + __float2int_rn(pattern_x[idx] * sina + pattern_y[idx] * cosa), \
loc.x + __float2int_rn(pattern_x[idx] * cosa - pattern_y[idx] * sina))
template <> struct OrbDescriptor<2>
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 16 * i;
pattern_y += 16 * i;
int t0, t1, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
val = t0 < t1;
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
val |= (t0 < t1) << 1;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
val |= (t0 < t1) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
val |= (t0 < t1) << 3;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
val |= (t0 < t1) << 4;
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
val |= (t0 < t1) << 5;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
val |= (t0 < t1) << 6;
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
return val;
}
};
template <> struct OrbDescriptor<3>
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 12 * i;
pattern_y += 12 * i;
int t0, t1, t2, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
return val;
}
};
template <> struct OrbDescriptor<4>
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 16 * i;
pattern_y += 16 * i;
int t0, t1, t2, t3, k, val;
int a, b;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val = k;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 2;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 4;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 6;
return val;
}
};
#undef GET_VALUE
template <int WTA_K>
__global__ void computeOrbDescriptor(const PtrStepb img, const short2* loc, const float* angle_, const int npoints,
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize)
{
const int descidx = blockIdx.x * blockDim.x + threadIdx.x;
const int ptidx = blockIdx.y * blockDim.y + threadIdx.y;
if (ptidx < npoints && descidx < dsize)
{
float angle = angle_[ptidx];
angle *= (float)(CV_PI / 180.f);
float sina, cosa;
::sincosf(angle, &sina, &cosa);
desc.ptr(ptidx)[descidx] = OrbDescriptor<WTA_K>::calc(img, loc[ptidx], pattern_x, pattern_y, sina, cosa, descidx);
}
}
void computeOrbDescriptor_gpu(PtrStepb img, const short2* loc, const float* angle, const int npoints,
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize, int WTA_K, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid;
grid.x = divUp(dsize, block.x);
grid.y = divUp(npoints, block.y);
switch (WTA_K)
{
case 2:
computeOrbDescriptor<2><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
break;
case 3:
computeOrbDescriptor<3><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
break;
case 4:
computeOrbDescriptor<4><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
break;
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// mergeLocation
__global__ void mergeLocation(const short2* loc_, float* x, float* y, const int npoints, float scale)
{
const int ptidx = blockIdx.x * blockDim.x + threadIdx.x;
if (ptidx < npoints)
{
short2 loc = loc_[ptidx];
x[ptidx] = loc.x * scale;
y[ptidx] = loc.y * scale;
}
}
void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
{
dim3 block(256);
dim3 grid;
grid.x = divUp(npoints, block.x);
mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
//
// The original code was written by Paul Furgale and Chi Hay Tong
// and later optimized and prepared for integration into OpenCV by Itseez.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace orb
{
////////////////////////////////////////////////////////////////////////////////////////////////////////
// cull
int cull_gpu(int* loc, float* response, int size, int n_points)
{
thrust::device_ptr<int> loc_ptr(loc);
thrust::device_ptr<float> response_ptr(response);
thrust::sort_by_key(response_ptr, response_ptr + size, loc_ptr, thrust::greater<float>());
return n_points;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// HarrisResponses
__global__ void HarrisResponses(const PtrStepb img, const short2* loc_, float* response, const int npoints, const int blockSize, const float harris_k)
{
__shared__ int smem[8 * 32];
volatile int* srow = smem + threadIdx.y * blockDim.x;
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
if (ptidx < npoints)
{
const short2 loc = loc_[ptidx];
const int r = blockSize / 2;
const int x0 = loc.x - r;
const int y0 = loc.y - r;
int a = 0, b = 0, c = 0;
for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
{
const int i = ind / blockSize;
const int j = ind % blockSize;
int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
(img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));
int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
(img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
(img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));
a += Ix * Ix;
b += Iy * Iy;
c += Ix * Iy;
}
reduce<32>(srow, a, threadIdx.x, plus<volatile int>());
reduce<32>(srow, b, threadIdx.x, plus<volatile int>());
reduce<32>(srow, c, threadIdx.x, plus<volatile int>());
if (threadIdx.x == 0)
{
float scale = (1 << 2) * blockSize * 255.0f;
scale = 1.0f / scale;
const float scale_sq_sq = scale * scale * scale * scale;
response[ptidx] = ((float)a * b - (float)c * c - harris_k * ((float)a + b) * ((float)a + b)) * scale_sq_sq;
}
}
}
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid;
grid.x = divUp(npoints, block.y);
HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// IC_Angle
__constant__ int c_u_max[32];
void loadUMax(const int* u_max, int count)
{
cudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
}
__global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k)
{
__shared__ int smem[8 * 32];
volatile int* srow = smem + threadIdx.y * blockDim.x;
const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
if (ptidx < npoints)
{
int m_01 = 0, m_10 = 0;
const short2 loc = loc_[ptidx];
// Treat the center line differently, v=0
for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
m_10 += u * image(loc.y, loc.x + u);
reduce<32>(srow, m_10, threadIdx.x, plus<volatile int>());
for (int v = 1; v <= half_k; ++v)
{
// Proceed over the two lines
int v_sum = 0;
int m_sum = 0;
const int d = c_u_max[v];
for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
{
int val_plus = image(loc.y + v, loc.x + u);
int val_minus = image(loc.y - v, loc.x + u);
v_sum += (val_plus - val_minus);
m_sum += u * (val_plus + val_minus);
}
reduce<32>(srow, v_sum, threadIdx.x, plus<volatile int>());
reduce<32>(srow, m_sum, threadIdx.x, plus<volatile int>());
m_10 += m_sum;
m_01 += v * v_sum;
}
if (threadIdx.x == 0)
{
float kp_dir = ::atan2f((float)m_01, (float)m_10);
kp_dir += (kp_dir < 0) * (2.0f * CV_PI);
kp_dir *= 180.0f / CV_PI;
angle[ptidx] = kp_dir;
}
}
}
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid;
grid.x = divUp(npoints, block.y);
IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// computeOrbDescriptor
template <int WTA_K> struct OrbDescriptor;
#define GET_VALUE(idx) \
img(loc.y + __float2int_rn(pattern_x[idx] * sina + pattern_y[idx] * cosa), \
loc.x + __float2int_rn(pattern_x[idx] * cosa - pattern_y[idx] * sina))
template <> struct OrbDescriptor<2>
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 16 * i;
pattern_y += 16 * i;
int t0, t1, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
val = t0 < t1;
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
val |= (t0 < t1) << 1;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
val |= (t0 < t1) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
val |= (t0 < t1) << 3;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
val |= (t0 < t1) << 4;
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
val |= (t0 < t1) << 5;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
val |= (t0 < t1) << 6;
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
return val;
}
};
template <> struct OrbDescriptor<3>
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 12 * i;
pattern_y += 12 * i;
int t0, t1, t2, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
return val;
}
};
template <> struct OrbDescriptor<4>
{
__device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
{
pattern_x += 16 * i;
pattern_y += 16 * i;
int t0, t1, t2, t3, k, val;
int a, b;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
t2 = GET_VALUE(2); t3 = GET_VALUE(3);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val = k;
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
t2 = GET_VALUE(6); t3 = GET_VALUE(7);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 2;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
t2 = GET_VALUE(10); t3 = GET_VALUE(11);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 4;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
t2 = GET_VALUE(14); t3 = GET_VALUE(15);
a = 0, b = 2;
if( t1 > t0 ) t0 = t1, a = 1;
if( t3 > t2 ) t2 = t3, b = 3;
k = t0 > t2 ? a : b;
val |= k << 6;
return val;
}
};
#undef GET_VALUE
template <int WTA_K>
__global__ void computeOrbDescriptor(const PtrStepb img, const short2* loc, const float* angle_, const int npoints,
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize)
{
const int descidx = blockIdx.x * blockDim.x + threadIdx.x;
const int ptidx = blockIdx.y * blockDim.y + threadIdx.y;
if (ptidx < npoints && descidx < dsize)
{
float angle = angle_[ptidx];
angle *= (float)(CV_PI / 180.f);
float sina, cosa;
::sincosf(angle, &sina, &cosa);
desc.ptr(ptidx)[descidx] = OrbDescriptor<WTA_K>::calc(img, loc[ptidx], pattern_x, pattern_y, sina, cosa, descidx);
}
}
void computeOrbDescriptor_gpu(PtrStepb img, const short2* loc, const float* angle, const int npoints,
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize, int WTA_K, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid;
grid.x = divUp(dsize, block.x);
grid.y = divUp(npoints, block.y);
switch (WTA_K)
{
case 2:
computeOrbDescriptor<2><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
break;
case 3:
computeOrbDescriptor<3><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
break;
case 4:
computeOrbDescriptor<4><<<grid, block, 0, stream>>>(img, loc, angle, npoints, pattern_x, pattern_y, desc, dsize);
break;
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// mergeLocation
__global__ void mergeLocation(const short2* loc_, float* x, float* y, const int npoints, float scale)
{
const int ptidx = blockIdx.x * blockDim.x + threadIdx.x;
if (ptidx < npoints)
{
short2 loc = loc_[ptidx];
x[ptidx] = loc.x * scale;
y[ptidx] = loc.y * scale;
}
}
void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
{
dim3 block(256);
dim3 grid;
grid.x = divUp(npoints, block.x);
mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -1,228 +1,228 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
__shared__ work_t smem[256 + 4];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y;
const int src_y = 2 * y;
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
{
{
work_t sum;
sum = 0.0625f * src(src_y - 2, x);
sum = sum + 0.25f * src(src_y - 1, x);
sum = sum + 0.375f * src(src_y , x);
sum = sum + 0.25f * src(src_y + 1, x);
sum = sum + 0.0625f * src(src_y + 2, x);
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_t sum;
sum = 0.0625f * src(src_y - 2, left_x);
sum = sum + 0.25f * src(src_y - 1, left_x);
sum = sum + 0.375f * src(src_y , left_x);
sum = sum + 0.25f * src(src_y + 1, left_x);
sum = sum + 0.0625f * src(src_y + 2, left_x);
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_t sum;
sum = 0.0625f * src(src_y - 2, right_x);
sum = sum + 0.25f * src(src_y - 1, right_x);
sum = sum + 0.375f * src(src_y , right_x);
sum = sum + 0.25f * src(src_y + 1, right_x);
sum = sum + 0.0625f * src(src_y + 2, right_x);
smem[4 + threadIdx.x] = sum;
}
}
else
{
{
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
smem[4 + threadIdx.x] = sum;
}
}
__syncthreads();
if (threadIdx.x < 128)
{
const int tid2 = threadIdx.x * 2;
work_t sum;
sum = 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
if (dst_x < dst_cols)
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
}
}
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows);
B<T> b(src.rows, src.cols);
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t;
__shared__ work_t smem[256 + 4];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y;
const int src_y = 2 * y;
if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2)
{
{
work_t sum;
sum = 0.0625f * src(src_y - 2, x);
sum = sum + 0.25f * src(src_y - 1, x);
sum = sum + 0.375f * src(src_y , x);
sum = sum + 0.25f * src(src_y + 1, x);
sum = sum + 0.0625f * src(src_y + 2, x);
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_t sum;
sum = 0.0625f * src(src_y - 2, left_x);
sum = sum + 0.25f * src(src_y - 1, left_x);
sum = sum + 0.375f * src(src_y , left_x);
sum = sum + 0.25f * src(src_y + 1, left_x);
sum = sum + 0.0625f * src(src_y + 2, left_x);
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_t sum;
sum = 0.0625f * src(src_y - 2, right_x);
sum = sum + 0.25f * src(src_y - 1, right_x);
sum = sum + 0.375f * src(src_y , right_x);
sum = sum + 0.25f * src(src_y + 1, right_x);
sum = sum + 0.0625f * src(src_y + 2, right_x);
smem[4 + threadIdx.x] = sum;
}
}
else
{
{
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x));
smem[2 + threadIdx.x] = sum;
}
if (threadIdx.x < 2)
{
const int left_x = x - 2;
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x));
sum = sum + 0.375f * src(src_y , b.idx_col(left_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x));
smem[threadIdx.x] = sum;
}
if (threadIdx.x > 253)
{
const int right_x = x + 2;
work_t sum;
sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x));
sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x));
sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x));
sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x));
smem[4 + threadIdx.x] = sum;
}
}
__syncthreads();
if (threadIdx.x < 128)
{
const int tid2 = threadIdx.x * 2;
work_t sum;
sum = 0.0625f * smem[2 + tid2 - 2];
sum = sum + 0.25f * smem[2 + tid2 - 1];
sum = sum + 0.375f * smem[2 + tid2 ];
sum = sum + 0.25f * smem[2 + tid2 + 1];
sum = sum + 0.0625f * smem[2 + tid2 + 2];
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
if (dst_x < dst_cols)
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
}
}
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows);
B<T> b(src.rows, src.cols);
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,196 +1,196 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
__shared__ sum_t s_srcPatch[10][10];
__shared__ sum_t s_dstPatch[20][16];
if (threadIdx.x < 10 && threadIdx.y < 10)
{
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
srcx = ::abs(srcx);
srcx = ::min(src.cols - 1, srcx);
srcy = ::abs(srcy);
srcy = ::min(src.rows - 1, srcy);
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
}
__syncthreads();
sum_t sum = VecTraits<sum_t>::all(0);
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
const bool eveny = ((threadIdx.y & 1) == 0);
const int tidx = threadIdx.x;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
if (threadIdx.y < 2)
{
sum = VecTraits<sum_t>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
}
if (threadIdx.y > 13)
{
sum = VecTraits<sum_t>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
}
__syncthreads();
sum = VecTraits<sum_t>::all(0);
const int tidy = threadIdx.y;
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
if (x < dst.cols && y < dst.rows)
dst(y, x) = saturate_cast<T>(4.0f * sum);
}
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
pyrUp<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
__shared__ sum_t s_srcPatch[10][10];
__shared__ sum_t s_dstPatch[20][16];
if (threadIdx.x < 10 && threadIdx.y < 10)
{
int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
srcx = ::abs(srcx);
srcx = ::min(src.cols - 1, srcx);
srcy = ::abs(srcy);
srcy = ::min(src.rows - 1, srcy);
s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
}
__syncthreads();
sum_t sum = VecTraits<sum_t>::all(0);
const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0);
const bool eveny = ((threadIdx.y & 1) == 0);
const int tidx = threadIdx.x;
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum;
if (threadIdx.y < 2)
{
sum = VecTraits<sum_t>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[threadIdx.y][threadIdx.x] = sum;
}
if (threadIdx.y > 13)
{
sum = VecTraits<sum_t>::all(0);
if (eveny)
{
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum;
}
__syncthreads();
sum = VecTraits<sum_t>::all(0);
const int tidy = threadIdx.y;
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x];
sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x];
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x];
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x];
if (x < dst.cols && y < dst.rows)
dst(y, x) = saturate_cast<T>(4.0f * sum);
}
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
pyrUp<<<grid, block, 0, stream>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

File diff suppressed because it is too large Load Diff

View File

@@ -1,274 +1,274 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
}
}
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, int)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_remap_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff, yoff; \
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
PtrStepSz< type > dst, const float* borderValue, int cc) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(32, cc >= 20 ? 8 : 4); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_remap_ ## type , srcWhole); \
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
PtrStepSz< type > dst, const float*, int) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_remap_ ## type , srcWhole); \
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \
else \
{ \
BrdReplicate<type> brd(src.rows, src.cols); \
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
{
if (stream == 0)
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
else
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
}
};
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
static const caller_t callers[3][5] =
{
{
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
RemapDispatcher<PointFilter, BrdConstant, T>::call,
RemapDispatcher<PointFilter, BrdReflect, T>::call,
RemapDispatcher<PointFilter, BrdWrap, T>::call
},
{
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
RemapDispatcher<LinearFilter, BrdWrap, T>::call
},
{
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
RemapDispatcher<CubicFilter, BrdWrap, T>::call
}
};
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
}
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
}
}
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, int)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type) \
texture< type , cudaTextureType2D> tex_remap_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_remap_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff, yoff; \
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
PtrStepSz< type > dst, const float* borderValue, int cc) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(32, cc >= 20 ? 8 : 4); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_remap_ ## type , srcWhole); \
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
PtrStepSz< type > dst, const float*, int) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_remap_ ## type , srcWhole); \
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \
else \
{ \
BrdReplicate<type> brd(src.rows, src.cols); \
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(schar)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char2)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(char4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(short2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(short4)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int2)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(int4)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float)
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(float2)
OPENCV_GPU_IMPLEMENT_REMAP_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
{
if (stream == 0)
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
else
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
}
};
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
static const caller_t callers[3][5] =
{
{
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
RemapDispatcher<PointFilter, BrdConstant, T>::call,
RemapDispatcher<PointFilter, BrdReflect, T>::call,
RemapDispatcher<PointFilter, BrdWrap, T>::call
},
{
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
RemapDispatcher<LinearFilter, BrdWrap, T>::call
},
{
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
RemapDispatcher<CubicFilter, BrdWrap, T>::call
}
};
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
}
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,302 +1,302 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
#include <cfloat>
#include <opencv2/gpu/device/scan.hpp>
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float xcoo = x * fx;
const float ycoo = y * fy;
dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
}
}
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
dst(y, x) = saturate_cast<T>(src(y, x));
}
}
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_resize_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
const int xoff; \
const int yoff; \
__host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
else \
{ \
BrdReplicate< type > brd(src.rows, src.cols); \
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <template <typename> class Filter, typename T> struct ResizeDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
if (stream == 0)
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
else
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
}
};
template <typename T> struct ResizeDispatcher<AreaFilter, T>
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
int iscale_x = (int)round(fx);
int iscale_y = (int)round(fy);
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
else
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
}
};
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
PtrStepSzb dst, int interpolation, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
static const caller_t callers[4] =
{
ResizeDispatcher<PointFilter, T>::call,
ResizeDispatcher<LinearFilter, T>::call,
ResizeDispatcher<CubicFilter, T>::call,
ResizeDispatcher<AreaFilter, T>::call
};
// chenge to linear if area interpolation upscaling
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
static_cast< PtrStepSz<T> >(dst), stream);
}
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template<typename T> struct scan_traits{};
template<> struct scan_traits<uchar>
{
typedef float scan_line_type;
};
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
#include <cfloat>
#include <opencv2/gpu/device/scan.hpp>
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float xcoo = x * fx;
const float ycoo = y * fy;
dst(y, x) = saturate_cast<T>(src(ycoo, xcoo));
}
}
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
dst(y, x) = saturate_cast<T>(src(y, x));
}
}
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_resize_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
const int xoff; \
const int yoff; \
__host__ tex_resize_ ## type ## _reader(int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
else \
{ \
BrdReplicate< type > brd(src.rows, src.cols); \
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <template <typename> class Filter, typename T> struct ResizeDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
if (stream == 0)
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
else
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
}
};
template <typename T> struct ResizeDispatcher<AreaFilter, T>
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
(void)srcWhole;
(void)xoff;
(void)yoff;
int iscale_x = (int)round(fx);
int iscale_y = (int)round(fy);
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
else
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
}
};
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
PtrStepSzb dst, int interpolation, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
static const caller_t callers[4] =
{
ResizeDispatcher<PointFilter, T>::call,
ResizeDispatcher<LinearFilter, T>::call,
ResizeDispatcher<CubicFilter, T>::call,
ResizeDispatcher<AreaFilter, T>::call
};
// chenge to linear if area interpolation upscaling
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
static_cast< PtrStepSz<T> >(dst), stream);
}
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template<typename T> struct scan_traits{};
template<> struct scan_traits<uchar>
{
typedef float scan_line_type;
};
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,175 +1,175 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
namespace cv { namespace gpu { namespace device
{
namespace video_encoding
{
__device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
{
y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
}
__device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
{
rgbtoy(b, g, r, y);
u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
}
__global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
if (x + 1 >= src.cols || y + 1 >= src.rows)
return;
// get pointers to the data
const size_t planeSize = src.rows * dst.step;
PtrStepb y_plane(dst.data, dst.step);
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
uchar pix;
uchar y_val, u_val, v_val;
pix = src(y, x);
rgbtoy(pix, pix, pix, y_val);
y_plane(y, x) = y_val;
pix = src(y, x + 1);
rgbtoy(pix, pix, pix, y_val);
y_plane(y, x + 1) = y_val;
pix = src(y + 1, x);
rgbtoy(pix, pix, pix, y_val);
y_plane(y + 1, x) = y_val;
pix = src(y + 1, x + 1);
rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
y_plane(y + 1, x + 1) = y_val;
u_plane(y / 2, x / 2) = u_val;
v_plane(y / 2, x / 2) = v_val;
}
template <typename T>
__global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
if (x + 1 >= src.cols || y + 1 >= src.rows)
return;
// get pointers to the data
const size_t planeSize = src.rows * dst.step;
PtrStepb y_plane(dst.data, dst.step);
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
T pix;
uchar y_val, u_val, v_val;
pix = src(y, x);
rgbtoy(pix.z, pix.y, pix.x, y_val);
y_plane(y, x) = y_val;
pix = src(y, x + 1);
rgbtoy(pix.z, pix.y, pix.x, y_val);
y_plane(y, x + 1) = y_val;
pix = src(y + 1, x);
rgbtoy(pix.z, pix.y, pix.x, y_val);
y_plane(y + 1, x) = y_val;
pix = src(y + 1, x + 1);
rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
y_plane(y + 1, x + 1) = y_val;
u_plane(y / 2, x / 2) = u_val;
v_plane(y / 2, x / 2) = v_val;
}
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
{
dim3 block(32, 8);
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
Gray_to_YV12<<<grid, block>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
{
typedef typename TypeVec<uchar, cn>::vec_type src_t;
dim3 block(32, 8);
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
{
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
static const func_t funcs[] =
{
0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
};
funcs[cn](src, dst);
}
}
}}}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
namespace cv { namespace gpu { namespace device
{
namespace video_encoding
{
__device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
{
y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
}
__device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
{
rgbtoy(b, g, r, y);
u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
}
__global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
if (x + 1 >= src.cols || y + 1 >= src.rows)
return;
// get pointers to the data
const size_t planeSize = src.rows * dst.step;
PtrStepb y_plane(dst.data, dst.step);
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
uchar pix;
uchar y_val, u_val, v_val;
pix = src(y, x);
rgbtoy(pix, pix, pix, y_val);
y_plane(y, x) = y_val;
pix = src(y, x + 1);
rgbtoy(pix, pix, pix, y_val);
y_plane(y, x + 1) = y_val;
pix = src(y + 1, x);
rgbtoy(pix, pix, pix, y_val);
y_plane(y + 1, x) = y_val;
pix = src(y + 1, x + 1);
rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
y_plane(y + 1, x + 1) = y_val;
u_plane(y / 2, x / 2) = u_val;
v_plane(y / 2, x / 2) = v_val;
}
template <typename T>
__global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
{
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
if (x + 1 >= src.cols || y + 1 >= src.rows)
return;
// get pointers to the data
const size_t planeSize = src.rows * dst.step;
PtrStepb y_plane(dst.data, dst.step);
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
T pix;
uchar y_val, u_val, v_val;
pix = src(y, x);
rgbtoy(pix.z, pix.y, pix.x, y_val);
y_plane(y, x) = y_val;
pix = src(y, x + 1);
rgbtoy(pix.z, pix.y, pix.x, y_val);
y_plane(y, x + 1) = y_val;
pix = src(y + 1, x);
rgbtoy(pix.z, pix.y, pix.x, y_val);
y_plane(y + 1, x) = y_val;
pix = src(y + 1, x + 1);
rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
y_plane(y + 1, x + 1) = y_val;
u_plane(y / 2, x / 2) = u_val;
v_plane(y / 2, x / 2) = v_val;
}
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
{
dim3 block(32, 8);
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
Gray_to_YV12<<<grid, block>>>(src, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
{
typedef typename TypeVec<uchar, cn>::vec_type src_t;
dim3 block(32, 8);
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
{
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
static const func_t funcs[] =
{
0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
};
funcs[cn](src, dst);
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -1,387 +1,387 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
{
namespace row_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#else
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 4;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y >= src.rows)
return;
const T* src_row = src.ptr(y);
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
if (blockIdx.x > 0)
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
}
else
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
}
if (blockIdx.x + 2 < gridDim.x)
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
}
else
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int x = xStart + j * BLOCK_DIM_X;
if (x < src.cols)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearRowFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 4;
PATCH_PER_BLOCK = 4;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
B<T> brd(src.cols);
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T, typename D>
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReflect101>,
linearRowFilter_caller< 2, T, D, BrdRowReflect101>,
linearRowFilter_caller< 3, T, D, BrdRowReflect101>,
linearRowFilter_caller< 4, T, D, BrdRowReflect101>,
linearRowFilter_caller< 5, T, D, BrdRowReflect101>,
linearRowFilter_caller< 6, T, D, BrdRowReflect101>,
linearRowFilter_caller< 7, T, D, BrdRowReflect101>,
linearRowFilter_caller< 8, T, D, BrdRowReflect101>,
linearRowFilter_caller< 9, T, D, BrdRowReflect101>,
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
linearRowFilter_caller<32, T, D, BrdRowReflect101>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReplicate>,
linearRowFilter_caller< 2, T, D, BrdRowReplicate>,
linearRowFilter_caller< 3, T, D, BrdRowReplicate>,
linearRowFilter_caller< 4, T, D, BrdRowReplicate>,
linearRowFilter_caller< 5, T, D, BrdRowReplicate>,
linearRowFilter_caller< 6, T, D, BrdRowReplicate>,
linearRowFilter_caller< 7, T, D, BrdRowReplicate>,
linearRowFilter_caller< 8, T, D, BrdRowReplicate>,
linearRowFilter_caller< 9, T, D, BrdRowReplicate>,
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
linearRowFilter_caller<32, T, D, BrdRowReplicate>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowConstant>,
linearRowFilter_caller< 2, T, D, BrdRowConstant>,
linearRowFilter_caller< 3, T, D, BrdRowConstant>,
linearRowFilter_caller< 4, T, D, BrdRowConstant>,
linearRowFilter_caller< 5, T, D, BrdRowConstant>,
linearRowFilter_caller< 6, T, D, BrdRowConstant>,
linearRowFilter_caller< 7, T, D, BrdRowConstant>,
linearRowFilter_caller< 8, T, D, BrdRowConstant>,
linearRowFilter_caller< 9, T, D, BrdRowConstant>,
linearRowFilter_caller<10, T, D, BrdRowConstant>,
linearRowFilter_caller<11, T, D, BrdRowConstant>,
linearRowFilter_caller<12, T, D, BrdRowConstant>,
linearRowFilter_caller<13, T, D, BrdRowConstant>,
linearRowFilter_caller<14, T, D, BrdRowConstant>,
linearRowFilter_caller<15, T, D, BrdRowConstant>,
linearRowFilter_caller<16, T, D, BrdRowConstant>,
linearRowFilter_caller<17, T, D, BrdRowConstant>,
linearRowFilter_caller<18, T, D, BrdRowConstant>,
linearRowFilter_caller<19, T, D, BrdRowConstant>,
linearRowFilter_caller<20, T, D, BrdRowConstant>,
linearRowFilter_caller<21, T, D, BrdRowConstant>,
linearRowFilter_caller<22, T, D, BrdRowConstant>,
linearRowFilter_caller<23, T, D, BrdRowConstant>,
linearRowFilter_caller<24, T, D, BrdRowConstant>,
linearRowFilter_caller<25, T, D, BrdRowConstant>,
linearRowFilter_caller<26, T, D, BrdRowConstant>,
linearRowFilter_caller<27, T, D, BrdRowConstant>,
linearRowFilter_caller<28, T, D, BrdRowConstant>,
linearRowFilter_caller<29, T, D, BrdRowConstant>,
linearRowFilter_caller<30, T, D, BrdRowConstant>,
linearRowFilter_caller<31, T, D, BrdRowConstant>,
linearRowFilter_caller<32, T, D, BrdRowConstant>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReflect>,
linearRowFilter_caller< 2, T, D, BrdRowReflect>,
linearRowFilter_caller< 3, T, D, BrdRowReflect>,
linearRowFilter_caller< 4, T, D, BrdRowReflect>,
linearRowFilter_caller< 5, T, D, BrdRowReflect>,
linearRowFilter_caller< 6, T, D, BrdRowReflect>,
linearRowFilter_caller< 7, T, D, BrdRowReflect>,
linearRowFilter_caller< 8, T, D, BrdRowReflect>,
linearRowFilter_caller< 9, T, D, BrdRowReflect>,
linearRowFilter_caller<10, T, D, BrdRowReflect>,
linearRowFilter_caller<11, T, D, BrdRowReflect>,
linearRowFilter_caller<12, T, D, BrdRowReflect>,
linearRowFilter_caller<13, T, D, BrdRowReflect>,
linearRowFilter_caller<14, T, D, BrdRowReflect>,
linearRowFilter_caller<15, T, D, BrdRowReflect>,
linearRowFilter_caller<16, T, D, BrdRowReflect>,
linearRowFilter_caller<17, T, D, BrdRowReflect>,
linearRowFilter_caller<18, T, D, BrdRowReflect>,
linearRowFilter_caller<19, T, D, BrdRowReflect>,
linearRowFilter_caller<20, T, D, BrdRowReflect>,
linearRowFilter_caller<21, T, D, BrdRowReflect>,
linearRowFilter_caller<22, T, D, BrdRowReflect>,
linearRowFilter_caller<23, T, D, BrdRowReflect>,
linearRowFilter_caller<24, T, D, BrdRowReflect>,
linearRowFilter_caller<25, T, D, BrdRowReflect>,
linearRowFilter_caller<26, T, D, BrdRowReflect>,
linearRowFilter_caller<27, T, D, BrdRowReflect>,
linearRowFilter_caller<28, T, D, BrdRowReflect>,
linearRowFilter_caller<29, T, D, BrdRowReflect>,
linearRowFilter_caller<30, T, D, BrdRowReflect>,
linearRowFilter_caller<31, T, D, BrdRowReflect>,
linearRowFilter_caller<32, T, D, BrdRowReflect>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowWrap>,
linearRowFilter_caller< 2, T, D, BrdRowWrap>,
linearRowFilter_caller< 3, T, D, BrdRowWrap>,
linearRowFilter_caller< 4, T, D, BrdRowWrap>,
linearRowFilter_caller< 5, T, D, BrdRowWrap>,
linearRowFilter_caller< 6, T, D, BrdRowWrap>,
linearRowFilter_caller< 7, T, D, BrdRowWrap>,
linearRowFilter_caller< 8, T, D, BrdRowWrap>,
linearRowFilter_caller< 9, T, D, BrdRowWrap>,
linearRowFilter_caller<10, T, D, BrdRowWrap>,
linearRowFilter_caller<11, T, D, BrdRowWrap>,
linearRowFilter_caller<12, T, D, BrdRowWrap>,
linearRowFilter_caller<13, T, D, BrdRowWrap>,
linearRowFilter_caller<14, T, D, BrdRowWrap>,
linearRowFilter_caller<15, T, D, BrdRowWrap>,
linearRowFilter_caller<16, T, D, BrdRowWrap>,
linearRowFilter_caller<17, T, D, BrdRowWrap>,
linearRowFilter_caller<18, T, D, BrdRowWrap>,
linearRowFilter_caller<19, T, D, BrdRowWrap>,
linearRowFilter_caller<20, T, D, BrdRowWrap>,
linearRowFilter_caller<21, T, D, BrdRowWrap>,
linearRowFilter_caller<22, T, D, BrdRowWrap>,
linearRowFilter_caller<23, T, D, BrdRowWrap>,
linearRowFilter_caller<24, T, D, BrdRowWrap>,
linearRowFilter_caller<25, T, D, BrdRowWrap>,
linearRowFilter_caller<26, T, D, BrdRowWrap>,
linearRowFilter_caller<27, T, D, BrdRowWrap>,
linearRowFilter_caller<28, T, D, BrdRowWrap>,
linearRowFilter_caller<29, T, D, BrdRowWrap>,
linearRowFilter_caller<30, T, D, BrdRowWrap>,
linearRowFilter_caller<31, T, D, BrdRowWrap>,
linearRowFilter_caller<32, T, D, BrdRowWrap>
}
};
loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace row_filter
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 1993-2011, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
{
namespace row_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#else
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 4;
const int PATCH_PER_BLOCK = 4;
const int HALO_SIZE = 1;
#endif
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y >= src.rows)
return;
const T* src_row = src.ptr(y);
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
if (blockIdx.x > 0)
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
}
else
{
//Load left halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
}
if (blockIdx.x + 2 < gridDim.x)
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
}
else
{
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
}
__syncthreads();
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int x = xStart + j * BLOCK_DIM_X;
if (x < src.cols)
{
sum_t sum = VecTraits<sum_t>::all(0);
#pragma unroll
for (int k = 0; k < KSIZE; ++k)
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearRowFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
int PATCH_PER_BLOCK;
if (cc >= 20)
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 8;
PATCH_PER_BLOCK = 4;
}
else
{
BLOCK_DIM_X = 32;
BLOCK_DIM_Y = 4;
PATCH_PER_BLOCK = 4;
}
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK), divUp(src.rows, BLOCK_DIM_Y));
B<T> brd(src.cols);
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T, typename D>
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReflect101>,
linearRowFilter_caller< 2, T, D, BrdRowReflect101>,
linearRowFilter_caller< 3, T, D, BrdRowReflect101>,
linearRowFilter_caller< 4, T, D, BrdRowReflect101>,
linearRowFilter_caller< 5, T, D, BrdRowReflect101>,
linearRowFilter_caller< 6, T, D, BrdRowReflect101>,
linearRowFilter_caller< 7, T, D, BrdRowReflect101>,
linearRowFilter_caller< 8, T, D, BrdRowReflect101>,
linearRowFilter_caller< 9, T, D, BrdRowReflect101>,
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
linearRowFilter_caller<32, T, D, BrdRowReflect101>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReplicate>,
linearRowFilter_caller< 2, T, D, BrdRowReplicate>,
linearRowFilter_caller< 3, T, D, BrdRowReplicate>,
linearRowFilter_caller< 4, T, D, BrdRowReplicate>,
linearRowFilter_caller< 5, T, D, BrdRowReplicate>,
linearRowFilter_caller< 6, T, D, BrdRowReplicate>,
linearRowFilter_caller< 7, T, D, BrdRowReplicate>,
linearRowFilter_caller< 8, T, D, BrdRowReplicate>,
linearRowFilter_caller< 9, T, D, BrdRowReplicate>,
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
linearRowFilter_caller<32, T, D, BrdRowReplicate>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowConstant>,
linearRowFilter_caller< 2, T, D, BrdRowConstant>,
linearRowFilter_caller< 3, T, D, BrdRowConstant>,
linearRowFilter_caller< 4, T, D, BrdRowConstant>,
linearRowFilter_caller< 5, T, D, BrdRowConstant>,
linearRowFilter_caller< 6, T, D, BrdRowConstant>,
linearRowFilter_caller< 7, T, D, BrdRowConstant>,
linearRowFilter_caller< 8, T, D, BrdRowConstant>,
linearRowFilter_caller< 9, T, D, BrdRowConstant>,
linearRowFilter_caller<10, T, D, BrdRowConstant>,
linearRowFilter_caller<11, T, D, BrdRowConstant>,
linearRowFilter_caller<12, T, D, BrdRowConstant>,
linearRowFilter_caller<13, T, D, BrdRowConstant>,
linearRowFilter_caller<14, T, D, BrdRowConstant>,
linearRowFilter_caller<15, T, D, BrdRowConstant>,
linearRowFilter_caller<16, T, D, BrdRowConstant>,
linearRowFilter_caller<17, T, D, BrdRowConstant>,
linearRowFilter_caller<18, T, D, BrdRowConstant>,
linearRowFilter_caller<19, T, D, BrdRowConstant>,
linearRowFilter_caller<20, T, D, BrdRowConstant>,
linearRowFilter_caller<21, T, D, BrdRowConstant>,
linearRowFilter_caller<22, T, D, BrdRowConstant>,
linearRowFilter_caller<23, T, D, BrdRowConstant>,
linearRowFilter_caller<24, T, D, BrdRowConstant>,
linearRowFilter_caller<25, T, D, BrdRowConstant>,
linearRowFilter_caller<26, T, D, BrdRowConstant>,
linearRowFilter_caller<27, T, D, BrdRowConstant>,
linearRowFilter_caller<28, T, D, BrdRowConstant>,
linearRowFilter_caller<29, T, D, BrdRowConstant>,
linearRowFilter_caller<30, T, D, BrdRowConstant>,
linearRowFilter_caller<31, T, D, BrdRowConstant>,
linearRowFilter_caller<32, T, D, BrdRowConstant>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowReflect>,
linearRowFilter_caller< 2, T, D, BrdRowReflect>,
linearRowFilter_caller< 3, T, D, BrdRowReflect>,
linearRowFilter_caller< 4, T, D, BrdRowReflect>,
linearRowFilter_caller< 5, T, D, BrdRowReflect>,
linearRowFilter_caller< 6, T, D, BrdRowReflect>,
linearRowFilter_caller< 7, T, D, BrdRowReflect>,
linearRowFilter_caller< 8, T, D, BrdRowReflect>,
linearRowFilter_caller< 9, T, D, BrdRowReflect>,
linearRowFilter_caller<10, T, D, BrdRowReflect>,
linearRowFilter_caller<11, T, D, BrdRowReflect>,
linearRowFilter_caller<12, T, D, BrdRowReflect>,
linearRowFilter_caller<13, T, D, BrdRowReflect>,
linearRowFilter_caller<14, T, D, BrdRowReflect>,
linearRowFilter_caller<15, T, D, BrdRowReflect>,
linearRowFilter_caller<16, T, D, BrdRowReflect>,
linearRowFilter_caller<17, T, D, BrdRowReflect>,
linearRowFilter_caller<18, T, D, BrdRowReflect>,
linearRowFilter_caller<19, T, D, BrdRowReflect>,
linearRowFilter_caller<20, T, D, BrdRowReflect>,
linearRowFilter_caller<21, T, D, BrdRowReflect>,
linearRowFilter_caller<22, T, D, BrdRowReflect>,
linearRowFilter_caller<23, T, D, BrdRowReflect>,
linearRowFilter_caller<24, T, D, BrdRowReflect>,
linearRowFilter_caller<25, T, D, BrdRowReflect>,
linearRowFilter_caller<26, T, D, BrdRowReflect>,
linearRowFilter_caller<27, T, D, BrdRowReflect>,
linearRowFilter_caller<28, T, D, BrdRowReflect>,
linearRowFilter_caller<29, T, D, BrdRowReflect>,
linearRowFilter_caller<30, T, D, BrdRowReflect>,
linearRowFilter_caller<31, T, D, BrdRowReflect>,
linearRowFilter_caller<32, T, D, BrdRowReflect>
},
{
0,
linearRowFilter_caller< 1, T, D, BrdRowWrap>,
linearRowFilter_caller< 2, T, D, BrdRowWrap>,
linearRowFilter_caller< 3, T, D, BrdRowWrap>,
linearRowFilter_caller< 4, T, D, BrdRowWrap>,
linearRowFilter_caller< 5, T, D, BrdRowWrap>,
linearRowFilter_caller< 6, T, D, BrdRowWrap>,
linearRowFilter_caller< 7, T, D, BrdRowWrap>,
linearRowFilter_caller< 8, T, D, BrdRowWrap>,
linearRowFilter_caller< 9, T, D, BrdRowWrap>,
linearRowFilter_caller<10, T, D, BrdRowWrap>,
linearRowFilter_caller<11, T, D, BrdRowWrap>,
linearRowFilter_caller<12, T, D, BrdRowWrap>,
linearRowFilter_caller<13, T, D, BrdRowWrap>,
linearRowFilter_caller<14, T, D, BrdRowWrap>,
linearRowFilter_caller<15, T, D, BrdRowWrap>,
linearRowFilter_caller<16, T, D, BrdRowWrap>,
linearRowFilter_caller<17, T, D, BrdRowWrap>,
linearRowFilter_caller<18, T, D, BrdRowWrap>,
linearRowFilter_caller<19, T, D, BrdRowWrap>,
linearRowFilter_caller<20, T, D, BrdRowWrap>,
linearRowFilter_caller<21, T, D, BrdRowWrap>,
linearRowFilter_caller<22, T, D, BrdRowWrap>,
linearRowFilter_caller<23, T, D, BrdRowWrap>,
linearRowFilter_caller<24, T, D, BrdRowWrap>,
linearRowFilter_caller<25, T, D, BrdRowWrap>,
linearRowFilter_caller<26, T, D, BrdRowWrap>,
linearRowFilter_caller<27, T, D, BrdRowWrap>,
linearRowFilter_caller<28, T, D, BrdRowWrap>,
linearRowFilter_caller<29, T, D, BrdRowWrap>,
linearRowFilter_caller<30, T, D, BrdRowWrap>,
linearRowFilter_caller<31, T, D, BrdRowWrap>,
linearRowFilter_caller<32, T, D, BrdRowWrap>
}
};
loadKernel(kernel, ksize, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace row_filter
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,95 +1,95 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
#define __OPENCV_CUDA_SAFE_CALL_HPP__
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <cublas.h>
#include "NCV.hpp"
#if defined(__GNUC__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv { namespace gpu
{
void nppError(int err, const char *file, const int line, const char *func = "");
void ncvError(int err, const char *file, const int line, const char *func = "");
void cufftError(int err, const char *file, const int line, const char *func = "");
void cublasError(int err, const char *file, const int line, const char *func = "");
}}
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
cv::gpu::nppError(err, file, line, func);
}
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (NCV_SUCCESS != err)
cv::gpu::ncvError(err, file, line, func);
}
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
{
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
{
if (CUBLAS_STATUS_SUCCESS != err)
cv::gpu::cublasError(err, file, line, func);
}
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
#define __OPENCV_CUDA_SAFE_CALL_HPP__
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <cublas.h>
#include "NCV.hpp"
#if defined(__GNUC__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv { namespace gpu
{
void nppError(int err, const char *file, const int line, const char *func = "");
void ncvError(int err, const char *file, const int line, const char *func = "");
void cufftError(int err, const char *file, const int line, const char *func = "");
void cublasError(int err, const char *file, const int line, const char *func = "");
}}
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
cv::gpu::nppError(err, file, line, func);
}
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (NCV_SUCCESS != err)
cv::gpu::ncvError(err, file, line, func);
}
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
{
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
{
if (CUBLAS_STATUS_SUCCESS != err)
cv::gpu::cublasError(err, file, line, func);
}
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -56,14 +56,14 @@ namespace cv
template<class T, enum cudaTextureReadMode readMode>
TextureBinder(const PtrStepSz<T>& arr, const struct texture<T, 2, readMode>& tex) : texref(&tex)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step) );
}
template<class T, enum cudaTextureReadMode readMode>
TextureBinder(const PtrSz<T>& arr, const struct texture<T, 1, readMode> &tex) : texref(&tex)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture(0, tex, arr.data, desc, arr.size * arr.elemSize()) );
}

View File

@@ -1,389 +1,389 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
__constant__ float c_warpMat[3 * 3];
struct AffineTransform
{
static __device__ __forceinline__ float2 calcCoord(int x, int y)
{
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
return make_float2(xcoo, ycoo);
}
};
struct PerspectiveTransform
{
static __device__ __forceinline__ float2 calcCoord(int x, int y)
{
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
return make_float2(xcoo, ycoo);
}
};
///////////////////////////////////////////////////////////////////
// Build Maps
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < xmap.cols && y < xmap.rows)
{
const float2 coord = Transform::calcCoord(x, y);
xmap(y, x) = coord.x;
ymap(y, x) = coord.y;
}
}
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
}
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
}
///////////////////////////////////////////////////////////////////
// Warp
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float2 coord = Transform::calcCoord(x, y);
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
}
}
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, int)
{
(void)xoff;
(void)yoff;
(void)srcWhole;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block>>>(filter_src, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_warp_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff, yoff; \
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
} \
}; \
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, int cc) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(32, cc >= 20 ? 8 : 4); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_warp_ ## type , srcWhole); \
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, int) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_warp_ ## type , srcWhole); \
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
} \
else \
{ \
BrdReplicate<type> brd(src.rows, src.cols); \
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
{
if (stream == 0)
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc);
else
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc);
}
};
template <class Transform, typename T>
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
static const func_t funcs[3][5] =
{
{
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
},
{
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
},
{
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
}
};
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
}
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
}
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
}
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
__constant__ float c_warpMat[3 * 3];
struct AffineTransform
{
static __device__ __forceinline__ float2 calcCoord(int x, int y)
{
const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
return make_float2(xcoo, ycoo);
}
};
struct PerspectiveTransform
{
static __device__ __forceinline__ float2 calcCoord(int x, int y)
{
const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
return make_float2(xcoo, ycoo);
}
};
///////////////////////////////////////////////////////////////////
// Build Maps
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < xmap.cols && y < xmap.rows)
{
const float2 coord = Transform::calcCoord(x, y);
xmap(y, x) = coord.x;
ymap(y, x) = coord.y;
}
}
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
}
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
}
///////////////////////////////////////////////////////////////////
// Warp
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const float2 coord = Transform::calcCoord(x, y);
dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
}
}
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, int)
{
(void)xoff;
(void)yoff;
(void)srcWhole;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block>>>(filter_src, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
#define OPENCV_GPU_IMPLEMENT_WARP_TEX(type) \
texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_warp_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff, yoff; \
tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
} \
}; \
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, int cc) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(32, cc >= 20 ? 8 : 4); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_warp_ ## type , srcWhole); \
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, int) \
{ \
dim3 block(32, 8); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_warp_ ## type , srcWhole); \
tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
{ \
Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
} \
else \
{ \
BrdReplicate<type> brd(src.rows, src.cols); \
BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(schar)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char2)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(char4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(short)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(short2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(short4)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int2)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(int4)
OPENCV_GPU_IMPLEMENT_WARP_TEX(float)
//OPENCV_GPU_IMPLEMENT_WARP_TEX(float2)
OPENCV_GPU_IMPLEMENT_WARP_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_WARP_TEX
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
{
if (stream == 0)
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc);
else
WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc);
}
};
template <class Transform, typename T>
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
static const func_t funcs[3][5] =
{
{
WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call
},
{
WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call
},
{
WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call
}
};
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
}
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
}
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
{
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
}
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */

View File

@@ -1,253 +1,253 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if defined HAVE_CUDA
struct Stream::Impl
{
static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; }
cudaStream_t stream;
int ref_counter;
};
#include "opencv2/gpu/stream_accessor.hpp"
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return Stream::Impl::getStream(stream.impl);
};
#endif /* !defined (HAVE_CUDA) */
#if !defined (HAVE_CUDA)
void cv::gpu::Stream::create() { throw_nogpu(); }
void cv::gpu::Stream::release() { throw_nogpu(); }
cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }
cv::gpu::Stream::~Stream() { throw_nogpu(); }
cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }
Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, CudaMem& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
}}
namespace
{
template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)
{
dst.create(src.size(), src.type());
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) );
};
}
void cv::gpu::Stream::create()
{
if (impl)
release();
cudaStream_t stream;
cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));
impl->stream = stream;
impl->ref_counter = 1;
}
void cv::gpu::Stream::release()
{
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )
{
cudaSafeCall( cudaStreamDestroy( impl->stream ) );
cv::fastFree( impl );
}
}
cv::gpu::Stream::Stream() : impl(0) { create(); }
cv::gpu::Stream::~Stream() { release(); }
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
{
if( impl )
CV_XADD(&impl->ref_counter, 1);
}
Stream& cv::gpu::Stream::operator=(const Stream& stream)
{
if( this != &stream )
{
if( stream.impl )
CV_XADD(&stream.impl->ref_counter, 1);
release();
impl = stream.impl;
}
return *this;
}
bool cv::gpu::Stream::queryIfComplete()
{
cudaError_t err = cudaStreamQuery( Impl::getStream(impl) );
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
}
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
{
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
if (src.depth() == CV_8U)
{
int cn = src.channels();
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{
int val = saturate_cast<uchar>(s[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
}
setTo(src, s, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
{
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
CV_Assert(mask.type() == CV_8UC1);
setTo(src, val, mask, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
{
CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
if( rtype < 0 )
rtype = src.type();
else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale )
{
src.copyTo(dst);
return;
}
GpuMat temp;
const GpuMat* psrc = &src;
if( sdepth != ddepth && psrc == &dst )
psrc = &(temp = src);
dst.create( src.size(), rtype );
convertTo(src, dst, alpha, beta, Impl::getStream(impl));
}
cv::gpu::Stream::operator bool() const
{
return impl && impl->stream;
}
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {}
cv::gpu::Stream& cv::gpu::Stream::Null()
{
static Stream s((Impl*)0);
return s;
}
#endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if defined HAVE_CUDA
struct Stream::Impl
{
static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; }
cudaStream_t stream;
int ref_counter;
};
#include "opencv2/gpu/stream_accessor.hpp"
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return Stream::Impl::getStream(stream.impl);
};
#endif /* !defined (HAVE_CUDA) */
#if !defined (HAVE_CUDA)
void cv::gpu::Stream::create() { throw_nogpu(); }
void cv::gpu::Stream::release() { throw_nogpu(); }
cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }
cv::gpu::Stream::~Stream() { throw_nogpu(); }
cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }
Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, CudaMem& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; }
cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
}}
namespace
{
template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)
{
dst.create(src.size(), src.type());
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) );
};
}
void cv::gpu::Stream::create()
{
if (impl)
release();
cudaStream_t stream;
cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));
impl->stream = stream;
impl->ref_counter = 1;
}
void cv::gpu::Stream::release()
{
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )
{
cudaSafeCall( cudaStreamDestroy( impl->stream ) );
cv::fastFree( impl );
}
}
cv::gpu::Stream::Stream() : impl(0) { create(); }
cv::gpu::Stream::~Stream() { release(); }
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
{
if( impl )
CV_XADD(&impl->ref_counter, 1);
}
Stream& cv::gpu::Stream::operator=(const Stream& stream)
{
if( this != &stream )
{
if( stream.impl )
CV_XADD(&stream.impl->ref_counter, 1);
release();
impl = stream.impl;
}
return *this;
}
bool cv::gpu::Stream::queryIfComplete()
{
cudaError_t err = cudaStreamQuery( Impl::getStream(impl) );
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
}
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
{
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
if (src.depth() == CV_8U)
{
int cn = src.channels();
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{
int val = saturate_cast<uchar>(s[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
return;
}
}
setTo(src, s, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
{
CV_Assert((src.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
CV_Assert(mask.type() == CV_8UC1);
setTo(src, val, mask, Impl::getStream(impl));
}
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
{
CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
if( rtype < 0 )
rtype = src.type();
else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale )
{
src.copyTo(dst);
return;
}
GpuMat temp;
const GpuMat* psrc = &src;
if( sdepth != ddepth && psrc == &dst )
psrc = &(temp = src);
dst.create( src.size(), rtype );
convertTo(src, dst, alpha, beta, Impl::getStream(impl));
}
cv::gpu::Stream::operator bool() const
{
return impl && impl->stream;
}
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {}
cv::gpu::Stream& cv::gpu::Stream::Null()
{
static Stream s((Impl*)0);
return s;
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,63 +1,63 @@
#include "cuvid_video_source.h"
#include "cu_safe_call.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const std::string& fname)
{
CUVIDSOURCEPARAMS params;
std::memset(&params, 0, sizeof(CUVIDSOURCEPARAMS));
// Fill parameter struct
params.pUserData = this; // will be passed to data handlers
params.pfnVideoDataHandler = HandleVideoData; // our local video-handler callback
params.pfnAudioDataHandler = 0;
// now create the actual source
CUresult res = cuvidCreateVideoSource(&videoSource_, fname.c_str(), &params);
if (res == CUDA_ERROR_INVALID_SOURCE)
throw std::runtime_error("Unsupported video source");
cuSafeCall( res );
CUVIDEOFORMAT vidfmt;
cuSafeCall( cuvidGetSourceVideoFormat(videoSource_, &vidfmt, 0) );
format_.codec = static_cast<VideoReader_GPU::Codec>(vidfmt.codec);
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(vidfmt.chroma_format);
format_.width = vidfmt.coded_width;
format_.height = vidfmt.coded_height;
}
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const
{
return format_;
}
void cv::gpu::detail::CuvidVideoSource::start()
{
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Started) );
}
void cv::gpu::detail::CuvidVideoSource::stop()
{
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Stopped) );
}
bool cv::gpu::detail::CuvidVideoSource::isStarted() const
{
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Started);
}
bool cv::gpu::detail::CuvidVideoSource::hasError() const
{
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Error);
}
int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, CUVIDSOURCEDATAPACKET* packet)
{
CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);
return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
}
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
#include "cuvid_video_source.h"
#include "cu_safe_call.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const std::string& fname)
{
CUVIDSOURCEPARAMS params;
std::memset(&params, 0, sizeof(CUVIDSOURCEPARAMS));
// Fill parameter struct
params.pUserData = this; // will be passed to data handlers
params.pfnVideoDataHandler = HandleVideoData; // our local video-handler callback
params.pfnAudioDataHandler = 0;
// now create the actual source
CUresult res = cuvidCreateVideoSource(&videoSource_, fname.c_str(), &params);
if (res == CUDA_ERROR_INVALID_SOURCE)
throw std::runtime_error("Unsupported video source");
cuSafeCall( res );
CUVIDEOFORMAT vidfmt;
cuSafeCall( cuvidGetSourceVideoFormat(videoSource_, &vidfmt, 0) );
format_.codec = static_cast<VideoReader_GPU::Codec>(vidfmt.codec);
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(vidfmt.chroma_format);
format_.width = vidfmt.coded_width;
format_.height = vidfmt.coded_height;
}
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const
{
return format_;
}
void cv::gpu::detail::CuvidVideoSource::start()
{
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Started) );
}
void cv::gpu::detail::CuvidVideoSource::stop()
{
cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Stopped) );
}
bool cv::gpu::detail::CuvidVideoSource::isStarted() const
{
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Started);
}
bool cv::gpu::detail::CuvidVideoSource::hasError() const
{
return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Error);
}
int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, CUVIDSOURCEDATAPACKET* packet)
{
CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);
return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
}
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)

View File

@@ -1,90 +1,90 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __CUVUD_VIDEO_SOURCE_H__
#define __CUVUD_VIDEO_SOURCE_H__
#include "precomp.hpp"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
namespace cv { namespace gpu
{
namespace detail
{
class CuvidVideoSource : public VideoReader_GPU::VideoSource
{
public:
explicit CuvidVideoSource(const std::string& fname);
~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); }
VideoReader_GPU::FormatInfo format() const;
void start();
void stop();
bool isStarted() const;
bool hasError() const;
private:
CuvidVideoSource(const CuvidVideoSource&);
CuvidVideoSource& operator =(const CuvidVideoSource&);
// Callback for handling packages of demuxed video data.
//
// Parameters:
// pUserData - Pointer to user data. We must pass a pointer to a
// VideoSourceData struct here, that contains a valid CUvideoparser
// and FrameQueue.
// pPacket - video-source data packet.
//
// NOTE: called from a different thread that doesn't not have a cuda context
//
static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket);
CUvideosource videoSource_;
VideoReader_GPU::FormatInfo format_;
};
}
}}
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
#endif // __CUVUD_VIDEO_SOURCE_H__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __CUVUD_VIDEO_SOURCE_H__
#define __CUVUD_VIDEO_SOURCE_H__
#include "precomp.hpp"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
namespace cv { namespace gpu
{
namespace detail
{
class CuvidVideoSource : public VideoReader_GPU::VideoSource
{
public:
explicit CuvidVideoSource(const std::string& fname);
~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); }
VideoReader_GPU::FormatInfo format() const;
void start();
void stop();
bool isStarted() const;
bool hasError() const;
private:
CuvidVideoSource(const CuvidVideoSource&);
CuvidVideoSource& operator =(const CuvidVideoSource&);
// Callback for handling packages of demuxed video data.
//
// Parameters:
// pUserData - Pointer to user data. We must pass a pointer to a
// VideoSourceData struct here, that contains a valid CUvideoparser
// and FrameQueue.
// pPacket - video-source data packet.
//
// NOTE: called from a different thread that doesn't not have a cuda context
//
static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket);
CUvideosource videoSource_;
VideoReader_GPU::FormatInfo format_;
};
}
}}
#endif // defined(HAVE_CUDA) && !defined(__APPLE__)
#endif // __CUVUD_VIDEO_SOURCE_H__

View File

@@ -124,7 +124,7 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
dst.create(src.size(), src.type());
func(src, dst, search_window/2, block_window/2, h, gpuBorderType, StreamAccessor::getStream(s));
}
@@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device
template<typename T>
void nlm_fast_gpu(const PtrStepSzb& src, PtrStepSzb dst, PtrStepi buffer,
int search_window, int block_window, float h, cudaStream_t stream);
void fnlm_split_channels(const PtrStepSz<uchar3>& lab, PtrStepb l, PtrStep<uchar2> ab, cudaStream_t stream);
void fnlm_merge_channels(const PtrStepb& l, const PtrStep<uchar2>& ab, PtrStepSz<uchar3> lab, cudaStream_t stream);
}
@@ -152,30 +152,30 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
{
CV_Assert(src.depth() == CV_8U && src.channels() < 4);
int border_size = search_window/2 + block_window/2;
int border_size = search_window/2 + block_window/2;
Size esize = src.size() + Size(border_size, border_size) * 2;
cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
cv::gpu::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
int bcols, brows;
device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
buffer.create(brows, bcols, CV_32S);
using namespace cv::gpu::device::imgproc;
typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
dst.create(src.size(), src.type());
funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
}
void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
{
{
#if (CUDA_VERSION < 5000)
(void)src;
(void)dst;
@@ -183,14 +183,14 @@ void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat&
(void)h_color;
(void)search_window;
(void)block_window;
(void)s;
(void)s;
CV_Error( CV_GpuApiCallError, "Lab method required CUDA 5.0 and higher" );
#else
CV_Assert(src.type() == CV_8UC3);
lab.create(src.size(), src.type());
cv::gpu::cvtColor(src, lab, CV_BGR2Lab, 0, s);
@@ -201,7 +201,7 @@ void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat&
l.create(src.size(), CV_8U);
ab.create(src.size(), CV_8UC2);
device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
simpleMethod(l, l, h_luminance, search_window, block_window, s);
simpleMethod(ab, ab, h_color, search_window, block_window, s);

File diff suppressed because it is too large Load Diff

View File

@@ -1,249 +1,249 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#ifdef HAVE_CUDA
namespace
{
#define error_entry(entry) { entry, #entry }
struct ErrorEntry
{
int code;
string str;
};
struct ErrorEntryComparer
{
int code;
ErrorEntryComparer(int code_) : code(code_) {};
bool operator()(const ErrorEntry& e) const { return e.code == code; }
};
string getErrorString(int code, const ErrorEntry* errors, size_t n)
{
size_t idx = find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
const string& msg = (idx != n) ? errors[idx].str : string("Unknown error code");
ostringstream ostr;
ostr << msg << " [Code = " << code << "]";
return ostr.str();
}
//////////////////////////////////////////////////////////////////////////
// NPP errors
const ErrorEntry npp_errors [] =
{
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ),
error_entry( NPP_ODD_ROI_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
//////////////////////////////////////////////////////////////////////////
// NCV errors
const ErrorEntry ncv_errors [] =
{
error_entry( NCV_SUCCESS ),
error_entry( NCV_UNKNOWN_ERROR ),
error_entry( NCV_CUDA_ERROR ),
error_entry( NCV_NPP_ERROR ),
error_entry( NCV_FILE_ERROR ),
error_entry( NCV_NULL_PTR ),
error_entry( NCV_INCONSISTENT_INPUT ),
error_entry( NCV_TEXTURE_BIND_ERROR ),
error_entry( NCV_DIMENSIONS_INVALID ),
error_entry( NCV_INVALID_ROI ),
error_entry( NCV_INVALID_STEP ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
error_entry( NCV_ALLOCATOR_BAD_REUSE ),
error_entry( NCV_MEM_COPY_ERROR ),
error_entry( NCV_MEM_RESIDENCE_ERROR ),
error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
error_entry( NPPST_SUCCESS ),
error_entry( NPPST_ERROR ),
error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPPST_NULL_POINTER_ERROR ),
error_entry( NPPST_TEXTURE_BIND_ERROR ),
error_entry( NPPST_MEMCPY_ERROR ),
error_entry( NPPST_MEM_ALLOC_ERR ),
error_entry( NPPST_MEMFREE_ERR ),
error_entry( NPPST_INVALID_ROI ),
error_entry( NPPST_INVALID_STEP ),
error_entry( NPPST_INVALID_SCALE ),
error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
error_entry( NPPST_MEM_RESIDENCE_ERROR ),
error_entry( NPPST_MEM_INTERNAL_ERROR )
};
const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
//////////////////////////////////////////////////////////////////////////
// CUFFT errors
const ErrorEntry cufft_errors[] =
{
error_entry( CUFFT_INVALID_PLAN ),
error_entry( CUFFT_ALLOC_FAILED ),
error_entry( CUFFT_INVALID_TYPE ),
error_entry( CUFFT_INVALID_VALUE ),
error_entry( CUFFT_INTERNAL_ERROR ),
error_entry( CUFFT_EXEC_FAILED ),
error_entry( CUFFT_SETUP_FAILED ),
error_entry( CUFFT_INVALID_SIZE ),
error_entry( CUFFT_UNALIGNED_DATA )
};
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
//////////////////////////////////////////////////////////////////////////
// CUBLAS errors
const ErrorEntry cublas_errors[] =
{
error_entry( CUBLAS_STATUS_SUCCESS ),
error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
error_entry( CUBLAS_STATUS_INVALID_VALUE ),
error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
};
const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
}
namespace cv
{
namespace gpu
{
void nppError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, npp_errors, npp_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void ncvError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, ncv_errors, ncv_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void cufftError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, cufft_errors, cufft_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void cublasError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, cublas_errors, cublas_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
}
}
#endif
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#ifdef HAVE_CUDA
namespace
{
#define error_entry(entry) { entry, #entry }
struct ErrorEntry
{
int code;
string str;
};
struct ErrorEntryComparer
{
int code;
ErrorEntryComparer(int code_) : code(code_) {};
bool operator()(const ErrorEntry& e) const { return e.code == code; }
};
string getErrorString(int code, const ErrorEntry* errors, size_t n)
{
size_t idx = find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
const string& msg = (idx != n) ? errors[idx].str : string("Unknown error code");
ostringstream ostr;
ostr << msg << " [Code = " << code << "]";
return ostr.str();
}
//////////////////////////////////////////////////////////////////////////
// NPP errors
const ErrorEntry npp_errors [] =
{
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ),
error_entry( NPP_ODD_ROI_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
//////////////////////////////////////////////////////////////////////////
// NCV errors
const ErrorEntry ncv_errors [] =
{
error_entry( NCV_SUCCESS ),
error_entry( NCV_UNKNOWN_ERROR ),
error_entry( NCV_CUDA_ERROR ),
error_entry( NCV_NPP_ERROR ),
error_entry( NCV_FILE_ERROR ),
error_entry( NCV_NULL_PTR ),
error_entry( NCV_INCONSISTENT_INPUT ),
error_entry( NCV_TEXTURE_BIND_ERROR ),
error_entry( NCV_DIMENSIONS_INVALID ),
error_entry( NCV_INVALID_ROI ),
error_entry( NCV_INVALID_STEP ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
error_entry( NCV_ALLOCATOR_BAD_REUSE ),
error_entry( NCV_MEM_COPY_ERROR ),
error_entry( NCV_MEM_RESIDENCE_ERROR ),
error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
error_entry( NPPST_SUCCESS ),
error_entry( NPPST_ERROR ),
error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPPST_NULL_POINTER_ERROR ),
error_entry( NPPST_TEXTURE_BIND_ERROR ),
error_entry( NPPST_MEMCPY_ERROR ),
error_entry( NPPST_MEM_ALLOC_ERR ),
error_entry( NPPST_MEMFREE_ERR ),
error_entry( NPPST_INVALID_ROI ),
error_entry( NPPST_INVALID_STEP ),
error_entry( NPPST_INVALID_SCALE ),
error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
error_entry( NPPST_MEM_RESIDENCE_ERROR ),
error_entry( NPPST_MEM_INTERNAL_ERROR )
};
const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
//////////////////////////////////////////////////////////////////////////
// CUFFT errors
const ErrorEntry cufft_errors[] =
{
error_entry( CUFFT_INVALID_PLAN ),
error_entry( CUFFT_ALLOC_FAILED ),
error_entry( CUFFT_INVALID_TYPE ),
error_entry( CUFFT_INVALID_VALUE ),
error_entry( CUFFT_INTERNAL_ERROR ),
error_entry( CUFFT_EXEC_FAILED ),
error_entry( CUFFT_SETUP_FAILED ),
error_entry( CUFFT_INVALID_SIZE ),
error_entry( CUFFT_UNALIGNED_DATA )
};
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
//////////////////////////////////////////////////////////////////////////
// CUBLAS errors
const ErrorEntry cublas_errors[] =
{
error_entry( CUBLAS_STATUS_SUCCESS ),
error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
error_entry( CUBLAS_STATUS_INVALID_VALUE ),
error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
};
const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
}
namespace cv
{
namespace gpu
{
void nppError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, npp_errors, npp_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void ncvError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, ncv_errors, ncv_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void cufftError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, cufft_errors, cufft_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void cublasError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, cublas_errors, cublas_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
}
}
#endif

View File

@@ -1,177 +1,177 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::gpu::FAST_GPU::FAST_GPU(int, bool, double) { throw_nogpu(); }
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::release() { throw_nogpu(); }
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_nogpu(); return 0; }
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; }
#else /* !defined (HAVE_CUDA) */
cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
{
}
void cv::gpu::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{
if (image.empty())
return;
(*this)(image, mask, d_keypoints_);
downloadKeypoints(d_keypoints_, keypoints);
}
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
{
if (d_keypoints.empty())
return;
Mat h_keypoints(d_keypoints);
convertKeypoints(h_keypoints, keypoints);
}
void cv::gpu::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
{
if (h_keypoints.empty())
return;
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
int npoints = h_keypoints.cols;
keypoints.resize(npoints);
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
for (int i = 0; i < npoints; ++i)
{
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
keypoints[i] = kp;
}
}
void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{
calcKeyPointsLocation(img, mask);
keypoints.cols = getKeyPoints(keypoints);
}
namespace cv { namespace gpu { namespace device
{
namespace fast
{
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
}
}}}
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
{
using namespace cv::gpu::device::fast;
CV_Assert(img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
if (nonmaxSupression)
{
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
score_.setTo(Scalar::all(0));
}
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSupression ? score_ : PtrStepSzi(), threshold);
count_ = std::min(count_, maxKeypoints);
return count_;
}
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{
using namespace cv::gpu::device::fast;
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
if (count_ == 0)
return 0;
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
if (nonmaxSupression)
return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
kpLoc_.colRange(0, count_).copyTo(locRow);
keypoints.row(1).setTo(Scalar::all(0));
return count_;
}
void cv::gpu::FAST_GPU::release()
{
kpLoc_.release();
score_.release();
d_keypoints_.release();
}
#endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::gpu::FAST_GPU::FAST_GPU(int, bool, double) { throw_nogpu(); }
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::FAST_GPU::release() { throw_nogpu(); }
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_nogpu(); return 0; }
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; }
#else /* !defined (HAVE_CUDA) */
cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
{
}
void cv::gpu::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{
if (image.empty())
return;
(*this)(image, mask, d_keypoints_);
downloadKeypoints(d_keypoints_, keypoints);
}
void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
{
if (d_keypoints.empty())
return;
Mat h_keypoints(d_keypoints);
convertKeypoints(h_keypoints, keypoints);
}
void cv::gpu::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
{
if (h_keypoints.empty())
return;
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
int npoints = h_keypoints.cols;
keypoints.resize(npoints);
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
for (int i = 0; i < npoints; ++i)
{
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
keypoints[i] = kp;
}
}
void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{
calcKeyPointsLocation(img, mask);
keypoints.cols = getKeyPoints(keypoints);
}
namespace cv { namespace gpu { namespace device
{
namespace fast
{
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
}
}}}
int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
{
using namespace cv::gpu::device::fast;
CV_Assert(img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
if (nonmaxSupression)
{
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
score_.setTo(Scalar::all(0));
}
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSupression ? score_ : PtrStepSzi(), threshold);
count_ = std::min(count_, maxKeypoints);
return count_;
}
int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{
using namespace cv::gpu::device::fast;
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
if (count_ == 0)
return 0;
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
if (nonmaxSupression)
return nonmaxSupression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
kpLoc_.colRange(0, count_).copyTo(locRow);
keypoints.row(1).setTo(Scalar::all(0));
return count_;
}
void cv::gpu::FAST_GPU::release()
{
kpLoc_.release();
score_.release();
d_keypoints_.release();
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,182 +1,182 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "ffmpeg_video_source.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
#ifdef HAVE_FFMPEG
#include "cap_ffmpeg_impl.hpp"
#else
#include "cap_ffmpeg_api.hpp"
#endif
namespace
{
Create_InputMediaStream_FFMPEG_Plugin create_InputMediaStream_FFMPEG_p = 0;
Release_InputMediaStream_FFMPEG_Plugin release_InputMediaStream_FFMPEG_p = 0;
Read_InputMediaStream_FFMPEG_Plugin read_InputMediaStream_FFMPEG_p = 0;
bool init_MediaStream_FFMPEG()
{
static bool initialized = 0;
if (!initialized)
{
#if defined WIN32 || defined _WIN32
const char* module_name = "opencv_ffmpeg"
CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION)
#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
"_64"
#endif
".dll";
static HMODULE cvFFOpenCV = LoadLibrary(module_name);
if (cvFFOpenCV)
{
create_InputMediaStream_FFMPEG_p =
(Create_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_InputMediaStream_FFMPEG");
release_InputMediaStream_FFMPEG_p =
(Release_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_InputMediaStream_FFMPEG");
read_InputMediaStream_FFMPEG_p =
(Read_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "read_InputMediaStream_FFMPEG");
initialized = create_InputMediaStream_FFMPEG_p != 0 && release_InputMediaStream_FFMPEG_p != 0 && read_InputMediaStream_FFMPEG_p != 0;
}
#elif defined HAVE_FFMPEG
create_InputMediaStream_FFMPEG_p = create_InputMediaStream_FFMPEG;
release_InputMediaStream_FFMPEG_p = release_InputMediaStream_FFMPEG;
read_InputMediaStream_FFMPEG_p = read_InputMediaStream_FFMPEG;
initialized = true;
#endif
}
return initialized;
}
}
cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const std::string& fname) :
stream_(0)
{
CV_Assert( init_MediaStream_FFMPEG() );
int codec;
int chroma_format;
int width;
int height;
stream_ = create_InputMediaStream_FFMPEG_p(fname.c_str(), &codec, &chroma_format, &width, &height);
if (!stream_)
CV_Error(CV_StsUnsupportedFormat, "Unsupported video source");
format_.codec = static_cast<VideoReader_GPU::Codec>(codec);
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(chroma_format);
format_.width = width;
format_.height = height;
}
cv::gpu::detail::FFmpegVideoSource::~FFmpegVideoSource()
{
release_InputMediaStream_FFMPEG_p(stream_);
}
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const
{
return format_;
}
void cv::gpu::detail::FFmpegVideoSource::start()
{
stop_ = false;
hasError_ = false;
thread_.reset(new Thread(readLoop, this));
}
void cv::gpu::detail::FFmpegVideoSource::stop()
{
stop_ = true;
thread_->wait();
thread_.reset();
}
bool cv::gpu::detail::FFmpegVideoSource::isStarted() const
{
return !stop_;
}
bool cv::gpu::detail::FFmpegVideoSource::hasError() const
{
return hasError_;
}
void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData)
{
FFmpegVideoSource* thiz = static_cast<FFmpegVideoSource*>(userData);
for (;;)
{
unsigned char* data;
int size;
int endOfFile;
if (!read_InputMediaStream_FFMPEG_p(thiz->stream_, &data, &size, &endOfFile))
{
thiz->hasError_ = !endOfFile;
break;
}
if (!thiz->parseVideoData(data, size))
{
thiz->hasError_ = true;
break;
}
if (thiz->stop_)
break;
}
thiz->parseVideoData(0, 0, true);
}
#endif // HAVE_CUDA
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "ffmpeg_video_source.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
#ifdef HAVE_FFMPEG
#include "cap_ffmpeg_impl.hpp"
#else
#include "cap_ffmpeg_api.hpp"
#endif
namespace
{
Create_InputMediaStream_FFMPEG_Plugin create_InputMediaStream_FFMPEG_p = 0;
Release_InputMediaStream_FFMPEG_Plugin release_InputMediaStream_FFMPEG_p = 0;
Read_InputMediaStream_FFMPEG_Plugin read_InputMediaStream_FFMPEG_p = 0;
bool init_MediaStream_FFMPEG()
{
static bool initialized = 0;
if (!initialized)
{
#if defined WIN32 || defined _WIN32
const char* module_name = "opencv_ffmpeg"
CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION)
#if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
"_64"
#endif
".dll";
static HMODULE cvFFOpenCV = LoadLibrary(module_name);
if (cvFFOpenCV)
{
create_InputMediaStream_FFMPEG_p =
(Create_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_InputMediaStream_FFMPEG");
release_InputMediaStream_FFMPEG_p =
(Release_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_InputMediaStream_FFMPEG");
read_InputMediaStream_FFMPEG_p =
(Read_InputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "read_InputMediaStream_FFMPEG");
initialized = create_InputMediaStream_FFMPEG_p != 0 && release_InputMediaStream_FFMPEG_p != 0 && read_InputMediaStream_FFMPEG_p != 0;
}
#elif defined HAVE_FFMPEG
create_InputMediaStream_FFMPEG_p = create_InputMediaStream_FFMPEG;
release_InputMediaStream_FFMPEG_p = release_InputMediaStream_FFMPEG;
read_InputMediaStream_FFMPEG_p = read_InputMediaStream_FFMPEG;
initialized = true;
#endif
}
return initialized;
}
}
cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const std::string& fname) :
stream_(0)
{
CV_Assert( init_MediaStream_FFMPEG() );
int codec;
int chroma_format;
int width;
int height;
stream_ = create_InputMediaStream_FFMPEG_p(fname.c_str(), &codec, &chroma_format, &width, &height);
if (!stream_)
CV_Error(CV_StsUnsupportedFormat, "Unsupported video source");
format_.codec = static_cast<VideoReader_GPU::Codec>(codec);
format_.chromaFormat = static_cast<VideoReader_GPU::ChromaFormat>(chroma_format);
format_.width = width;
format_.height = height;
}
cv::gpu::detail::FFmpegVideoSource::~FFmpegVideoSource()
{
release_InputMediaStream_FFMPEG_p(stream_);
}
cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const
{
return format_;
}
void cv::gpu::detail::FFmpegVideoSource::start()
{
stop_ = false;
hasError_ = false;
thread_.reset(new Thread(readLoop, this));
}
void cv::gpu::detail::FFmpegVideoSource::stop()
{
stop_ = true;
thread_->wait();
thread_.reset();
}
bool cv::gpu::detail::FFmpegVideoSource::isStarted() const
{
return !stop_;
}
bool cv::gpu::detail::FFmpegVideoSource::hasError() const
{
return hasError_;
}
void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData)
{
FFmpegVideoSource* thiz = static_cast<FFmpegVideoSource*>(userData);
for (;;)
{
unsigned char* data;
int size;
int endOfFile;
if (!read_InputMediaStream_FFMPEG_p(thiz->stream_, &data, &size, &endOfFile))
{
thiz->hasError_ = !endOfFile;
break;
}
if (!thiz->parseVideoData(data, size))
{
thiz->hasError_ = true;
break;
}
if (thiz->stop_)
break;
}
thiz->parseVideoData(0, 0, true);
}
#endif // HAVE_CUDA

View File

@@ -1,88 +1,88 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __FFMPEG_VIDEO_SOURCE_H__
#define __FFMPEG_VIDEO_SOURCE_H__
#include "precomp.hpp"
#include "thread_wrappers.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
struct InputMediaStream_FFMPEG;
namespace cv { namespace gpu
{
namespace detail
{
class FFmpegVideoSource : public VideoReader_GPU::VideoSource
{
public:
FFmpegVideoSource(const std::string& fname);
~FFmpegVideoSource();
VideoReader_GPU::FormatInfo format() const;
void start();
void stop();
bool isStarted() const;
bool hasError() const;
private:
FFmpegVideoSource(const FFmpegVideoSource&);
FFmpegVideoSource& operator =(const FFmpegVideoSource&);
VideoReader_GPU::FormatInfo format_;
InputMediaStream_FFMPEG* stream_;
std::auto_ptr<Thread> thread_;
volatile bool stop_;
volatile bool hasError_;
static void readLoop(void* userData);
};
}
}}
#endif // HAVE_CUDA
#endif // __CUVUD_VIDEO_SOURCE_H__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __FFMPEG_VIDEO_SOURCE_H__
#define __FFMPEG_VIDEO_SOURCE_H__
#include "precomp.hpp"
#include "thread_wrappers.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
struct InputMediaStream_FFMPEG;
namespace cv { namespace gpu
{
namespace detail
{
class FFmpegVideoSource : public VideoReader_GPU::VideoSource
{
public:
FFmpegVideoSource(const std::string& fname);
~FFmpegVideoSource();
VideoReader_GPU::FormatInfo format() const;
void start();
void stop();
bool isStarted() const;
bool hasError() const;
private:
FFmpegVideoSource(const FFmpegVideoSource&);
FFmpegVideoSource& operator =(const FFmpegVideoSource&);
VideoReader_GPU::FormatInfo format_;
InputMediaStream_FFMPEG* stream_;
std::auto_ptr<Thread> thread_;
volatile bool stop_;
volatile bool hasError_;
static void readLoop(void* userData);
};
}
}}
#endif // HAVE_CUDA
#endif // __CUVUD_VIDEO_SOURCE_H__

File diff suppressed because it is too large Load Diff

View File

@@ -1,117 +1,117 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "frame_queue.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
cv::gpu::detail::FrameQueue::FrameQueue() :
endOfDecode_(0),
framesInQueue_(0),
readPosition_(0)
{
std::memset(displayQueue_, 0, sizeof(displayQueue_));
std::memset((void*)isFrameInUse_, 0, sizeof(isFrameInUse_));
}
bool cv::gpu::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
{
while (isInUse(pictureIndex))
{
// Decoder is getting too far ahead from display
Thread::sleep(1);
if (isEndOfDecode())
return false;
}
return true;
}
void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
{
// Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
// for display
isFrameInUse_[picParams->picture_index] = true;
// Wait until we have a free entry in the display queue (should never block if we have enough entries)
do
{
bool isFramePlaced = false;
{
CriticalSection::AutoLock autoLock(criticalSection_);
if (framesInQueue_ < MaximumSize)
{
int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
displayQueue_[writePosition] = *picParams;
framesInQueue_++;
isFramePlaced = true;
}
}
if (isFramePlaced) // Done
break;
// Wait a bit
Thread::sleep(1);
} while (!isEndOfDecode());
}
bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
{
CriticalSection::AutoLock autoLock(criticalSection_);
if (framesInQueue_ > 0)
{
int entry = readPosition_;
displayInfo = displayQueue_[entry];
readPosition_ = (entry + 1) % MaximumSize;
framesInQueue_--;
return true;
}
return false;
}
#endif // HAVE_CUDA
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "frame_queue.h"
#if defined(HAVE_CUDA) && !defined(__APPLE__)
cv::gpu::detail::FrameQueue::FrameQueue() :
endOfDecode_(0),
framesInQueue_(0),
readPosition_(0)
{
std::memset(displayQueue_, 0, sizeof(displayQueue_));
std::memset((void*)isFrameInUse_, 0, sizeof(isFrameInUse_));
}
bool cv::gpu::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
{
while (isInUse(pictureIndex))
{
// Decoder is getting too far ahead from display
Thread::sleep(1);
if (isEndOfDecode())
return false;
}
return true;
}
void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
{
// Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
// for display
isFrameInUse_[picParams->picture_index] = true;
// Wait until we have a free entry in the display queue (should never block if we have enough entries)
do
{
bool isFramePlaced = false;
{
CriticalSection::AutoLock autoLock(criticalSection_);
if (framesInQueue_ < MaximumSize)
{
int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
displayQueue_[writePosition] = *picParams;
framesInQueue_++;
isFramePlaced = true;
}
}
if (isFramePlaced) // Done
break;
// Wait a bit
Thread::sleep(1);
} while (!isEndOfDecode());
}
bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
{
CriticalSection::AutoLock autoLock(criticalSection_);
if (framesInQueue_ > 0)
{
int entry = readPosition_;
displayInfo = displayQueue_[entry];
readPosition_ = (entry + 1) % MaximumSize;
framesInQueue_--;
return true;
}
return false;
}
#endif // HAVE_CUDA

Some files were not shown because too many files have changed in this diff Show More