moved device layer headers to include directory

This commit is contained in:
Vladislav Vinogradov
2012-10-05 18:04:23 +04:00
parent 3ebec7448d
commit ab3a5244ba
28 changed files with 10 additions and 10 deletions

View File

@@ -0,0 +1,714 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
namespace cv { namespace gpu { namespace device
{
//////////////////////////////////////////////////////////////
// BrdConstant
template <typename D> struct BrdRowConstant
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return x >= 0 ? saturate_cast<D>(data[x]) : val;
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return x < width ? saturate_cast<D>(data[x]) : val;
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
}
const int width;
const D val;
};
template <typename D> struct BrdColConstant
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
const int height;
const D val;
};
template <typename D> struct BrdConstant
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
{
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
const int height;
const int width;
const D val;
};
//////////////////////////////////////////////////////////////
// BrdReplicate
template <typename D> struct BrdRowReplicate
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
const int last_col;
};
template <typename D> struct BrdColReplicate
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
}
const int last_row;
};
template <typename D> struct BrdReplicate
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
const int last_row;
const int last_col;
};
//////////////////////////////////////////////////////////////
// BrdReflect101
template <typename D> struct BrdRowReflect101
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
const int last_col;
};
template <typename D> struct BrdColReflect101
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
const int last_row;
};
template <typename D> struct BrdReflect101
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
const int last_row;
const int last_col;
};
//////////////////////////////////////////////////////////////
// BrdReflect
template <typename D> struct BrdRowReflect
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (::abs(x) - (x < 0)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(::abs(x) - (x < 0));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
const int last_col;
};
template <typename D> struct BrdColReflect
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (::abs(y) - (y < 0)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(::abs(y) - (y < 0));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
const int last_row;
};
template <typename D> struct BrdReflect
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (::abs(y) - (y < 0)) % (last_row + 1);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (::abs(x) - (x < 0)) % (last_col + 1);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (last_col - ::abs(last_col - x) + (x > last_col));
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
const int last_row;
const int last_col;
};
//////////////////////////////////////////////////////////////
// BrdWrap
template <typename D> struct BrdRowWrap
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (x < width) * x + (x >= width) * (x % width);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(idx_col_low(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
}
const int width;
};
template <typename D> struct BrdColWrap
{
typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return (y < height) * y + (y >= height) * (y % height);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(idx_row_low(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
}
const int height;
};
template <typename D> struct BrdWrap
{
typedef D result_type;
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
height(height_), width(width_)
{
}
template <typename U>
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
height(height_), width(width_)
{
}
__device__ __forceinline__ int idx_row_low(int y) const
{
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return (y < height) * y + (y >= height) * (y % height);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_high(idx_row_low(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return (x < width) * x + (x >= width) * (x % width);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_high(idx_col_low(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
const int height;
const int width;
};
//////////////////////////////////////////////////////////////
// BorderReader
template <typename Ptr2D, typename B> struct BorderReader
{
typedef typename B::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
{
return b.at(y, x, ptr);
}
const Ptr2D ptr;
const B b;
};
// under win32 there is some bug with templated types that passed as kernel parameters
// with this specialization all works fine
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
{
typedef typename BrdConstant<D>::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
src(src_), height(b.height), width(b.width), val(b.val)
{
}
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
const Ptr2D src;
const int height;
const int width;
const D val;
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__

View File

@@ -0,0 +1,221 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_COLOR_HPP__
#define __OPENCV_GPU_COLOR_HPP__
#include "detail/color_detail.hpp"
namespace cv { namespace gpu { namespace device
{
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
// {
// typedef ... functor_type;
// static __host__ __device__ functor_type create_functor();
// };
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__

View File

@@ -0,0 +1,114 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_COMMON_HPP__
#define __OPENCV_GPU_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cuda_devptrs.hpp"
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795
#endif
#ifndef CV_PI_F
#ifndef CV_PI
#define CV_PI_F 3.14159265f
#else
#define CV_PI_F ((float)CV_PI)
#endif
#endif
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv { namespace gpu
{
void error(const char *error_string, const char *file, const int line, const char *func);
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
static inline bool isAligned(size_t step, size_t size)
{
return step % size == 0;
}
}}
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
#ifdef __CUDACC__
namespace cv { namespace gpu
{
__host__ __device__ __forceinline__ int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
namespace device
{
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
}
}}
#endif // __CUDACC__
#endif // __OPENCV_GPU_COMMON_HPP__

View File

@@ -0,0 +1,105 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
// for Fermi memory space is detected automatically
template <typename T> struct ForceGlob
{
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
};
#else // __CUDA_ARCH__ >= 200
#if defined(_WIN64) || defined(__LP64__)
// 64-bit register modifier for inlined asm
#define OPENCV_GPU_ASM_PTR "l"
#else
// 32-bit register modifier for inlined asm
#define OPENCV_GPU_ASM_PTR "r"
#endif
template<class T> struct ForceGlob;
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \
};
#define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
template <> struct ForceGlob<base_type> \
{ \
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
{ \
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
} \
};
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
#undef OPENCV_GPU_ASM_PTR
#endif // __CUDA_ARCH__ >= 200
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,841 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_REDUCTION_DETAIL_HPP__
#define __OPENCV_GPU_REDUCTION_DETAIL_HPP__
namespace cv { namespace gpu { namespace device
{
namespace utility_detail
{
///////////////////////////////////////////////////////////////////////////////
// Reductor
template <int n> struct WarpReductor
{
template <typename T, typename Op> static __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
if (tid < n)
data[tid] = partial_reduction;
if (n > 32) __syncthreads();
if (n > 32)
{
if (tid < n - 32)
data[tid] = partial_reduction = op(partial_reduction, data[tid + 32]);
if (tid < 16)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
}
}
else if (n > 16)
{
if (tid < n - 16)
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
if (tid < 8)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
}
}
else if (n > 8)
{
if (tid < n - 8)
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]);
if (tid < 4)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
}
}
else if (n > 4)
{
if (tid < n - 4)
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
if (tid < 2)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
}
}
else if (n > 2)
{
if (tid < n - 2)
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
if (tid < 2)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
}
}
}
};
template <> struct WarpReductor<64>
{
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
data[tid] = partial_reduction;
__syncthreads();
if (tid < 32)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 32]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
}
}
};
template <> struct WarpReductor<32>
{
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
data[tid] = partial_reduction;
if (tid < 16)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
}
}
};
template <> struct WarpReductor<16>
{
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
data[tid] = partial_reduction;
if (tid < 8)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
}
}
};
template <> struct WarpReductor<8>
{
template <typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
data[tid] = partial_reduction;
if (tid < 4)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
}
}
};
template <bool warp> struct ReductionDispatcher;
template <> struct ReductionDispatcher<true>
{
template <int n, typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
WarpReductor<n>::reduce(data, partial_reduction, tid, op);
}
};
template <> struct ReductionDispatcher<false>
{
template <int n, typename T, typename Op> static __device__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
if (tid < n)
data[tid] = partial_reduction;
__syncthreads();
if (n == 512) { if (tid < 256) { data[tid] = partial_reduction = op(partial_reduction, data[tid + 256]); } __syncthreads(); }
if (n >= 256) { if (tid < 128) { data[tid] = partial_reduction = op(partial_reduction, data[tid + 128]); } __syncthreads(); }
if (n >= 128) { if (tid < 64) { data[tid] = partial_reduction = op(partial_reduction, data[tid + 64]); } __syncthreads(); }
if (tid < 32)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 32]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
}
}
};
///////////////////////////////////////////////////////////////////////////////
// PredValWarpReductor
template <int n> struct PredValWarpReductor;
template <> struct PredValWarpReductor<64>
{
template <typename T, typename V, typename Pred>
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred)
{
if (tid < 32)
{
myData = sdata[tid];
myVal = sval[tid];
T reg = sdata[tid + 32];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 32];
}
reg = sdata[tid + 16];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 16];
}
reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 8];
}
reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 1];
}
}
}
};
template <> struct PredValWarpReductor<32>
{
template <typename T, typename V, typename Pred>
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred)
{
if (tid < 16)
{
myData = sdata[tid];
myVal = sval[tid];
T reg = sdata[tid + 16];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 16];
}
reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 8];
}
reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 1];
}
}
}
};
template <> struct PredValWarpReductor<16>
{
template <typename T, typename V, typename Pred>
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred)
{
if (tid < 8)
{
myData = sdata[tid];
myVal = sval[tid];
T reg = reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 8];
}
reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 1];
}
}
}
};
template <> struct PredValWarpReductor<8>
{
template <typename T, typename V, typename Pred>
static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred)
{
if (tid < 4)
{
myData = sdata[tid];
myVal = sval[tid];
T reg = reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 1];
}
}
}
};
template <bool warp> struct PredValReductionDispatcher;
template <> struct PredValReductionDispatcher<true>
{
template <int n, typename T, typename V, typename Pred> static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred)
{
PredValWarpReductor<n>::reduce(myData, myVal, sdata, sval, tid, pred);
}
};
template <> struct PredValReductionDispatcher<false>
{
template <int n, typename T, typename V, typename Pred> static __device__ void reduce(T& myData, V& myVal, volatile T* sdata, V* sval, int tid, const Pred& pred)
{
myData = sdata[tid];
myVal = sval[tid];
if (n >= 512 && tid < 256)
{
T reg = sdata[tid + 256];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 256];
}
__syncthreads();
}
if (n >= 256 && tid < 128)
{
T reg = sdata[tid + 128];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 128];
}
__syncthreads();
}
if (n >= 128 && tid < 64)
{
T reg = sdata[tid + 64];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 64];
}
__syncthreads();
}
if (tid < 32)
{
if (n >= 64)
{
T reg = sdata[tid + 32];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 32];
}
}
if (n >= 32)
{
T reg = sdata[tid + 16];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 16];
}
}
if (n >= 16)
{
T reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 8];
}
}
if (n >= 8)
{
T reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 4];
}
}
if (n >= 4)
{
T reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 2];
}
}
if (n >= 2)
{
T reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval[tid] = myVal = sval[tid + 1];
}
}
}
}
};
///////////////////////////////////////////////////////////////////////////////
// PredVal2WarpReductor
template <int n> struct PredVal2WarpReductor;
template <> struct PredVal2WarpReductor<64>
{
template <typename T, typename V1, typename V2, typename Pred>
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred)
{
if (tid < 32)
{
myData = sdata[tid];
myVal1 = sval1[tid];
myVal2 = sval2[tid];
T reg = sdata[tid + 32];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 32];
sval2[tid] = myVal2 = sval2[tid + 32];
}
reg = sdata[tid + 16];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 16];
sval2[tid] = myVal2 = sval2[tid + 16];
}
reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 8];
sval2[tid] = myVal2 = sval2[tid + 8];
}
reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 4];
sval2[tid] = myVal2 = sval2[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 2];
sval2[tid] = myVal2 = sval2[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 1];
sval2[tid] = myVal2 = sval2[tid + 1];
}
}
}
};
template <> struct PredVal2WarpReductor<32>
{
template <typename T, typename V1, typename V2, typename Pred>
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred)
{
if (tid < 16)
{
myData = sdata[tid];
myVal1 = sval1[tid];
myVal2 = sval2[tid];
T reg = sdata[tid + 16];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 16];
sval2[tid] = myVal2 = sval2[tid + 16];
}
reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 8];
sval2[tid] = myVal2 = sval2[tid + 8];
}
reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 4];
sval2[tid] = myVal2 = sval2[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 2];
sval2[tid] = myVal2 = sval2[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 1];
sval2[tid] = myVal2 = sval2[tid + 1];
}
}
}
};
template <> struct PredVal2WarpReductor<16>
{
template <typename T, typename V1, typename V2, typename Pred>
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred)
{
if (tid < 8)
{
myData = sdata[tid];
myVal1 = sval1[tid];
myVal2 = sval2[tid];
T reg = reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 8];
sval2[tid] = myVal2 = sval2[tid + 8];
}
reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 4];
sval2[tid] = myVal2 = sval2[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 2];
sval2[tid] = myVal2 = sval2[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 1];
sval2[tid] = myVal2 = sval2[tid + 1];
}
}
}
};
template <> struct PredVal2WarpReductor<8>
{
template <typename T, typename V1, typename V2, typename Pred>
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred)
{
if (tid < 4)
{
myData = sdata[tid];
myVal1 = sval1[tid];
myVal2 = sval2[tid];
T reg = reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 4];
sval2[tid] = myVal2 = sval2[tid + 4];
}
reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 2];
sval2[tid] = myVal2 = sval2[tid + 2];
}
reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 1];
sval2[tid] = myVal2 = sval2[tid + 1];
}
}
}
};
template <bool warp> struct PredVal2ReductionDispatcher;
template <> struct PredVal2ReductionDispatcher<true>
{
template <int n, typename T, typename V1, typename V2, typename Pred>
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred)
{
PredVal2WarpReductor<n>::reduce(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
}
};
template <> struct PredVal2ReductionDispatcher<false>
{
template <int n, typename T, typename V1, typename V2, typename Pred>
static __device__ void reduce(T& myData, V1& myVal1, V2& myVal2, volatile T* sdata, V1* sval1, V2* sval2, int tid, const Pred& pred)
{
myData = sdata[tid];
myVal1 = sval1[tid];
myVal2 = sval2[tid];
if (n >= 512 && tid < 256)
{
T reg = sdata[tid + 256];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 256];
sval2[tid] = myVal2 = sval2[tid + 256];
}
__syncthreads();
}
if (n >= 256 && tid < 128)
{
T reg = sdata[tid + 128];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 128];
sval2[tid] = myVal2 = sval2[tid + 128];
}
__syncthreads();
}
if (n >= 128 && tid < 64)
{
T reg = sdata[tid + 64];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 64];
sval2[tid] = myVal2 = sval2[tid + 64];
}
__syncthreads();
}
if (tid < 32)
{
if (n >= 64)
{
T reg = sdata[tid + 32];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 32];
sval2[tid] = myVal2 = sval2[tid + 32];
}
}
if (n >= 32)
{
T reg = sdata[tid + 16];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 16];
sval2[tid] = myVal2 = sval2[tid + 16];
}
}
if (n >= 16)
{
T reg = sdata[tid + 8];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 8];
sval2[tid] = myVal2 = sval2[tid + 8];
}
}
if (n >= 8)
{
T reg = sdata[tid + 4];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 4];
sval2[tid] = myVal2 = sval2[tid + 4];
}
}
if (n >= 4)
{
T reg = sdata[tid + 2];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 2];
sval2[tid] = myVal2 = sval2[tid + 2];
}
}
if (n >= 2)
{
T reg = sdata[tid + 1];
if (pred(reg, myData))
{
sdata[tid] = myData = reg;
sval1[tid] = myVal1 = sval1[tid + 1];
sval2[tid] = myVal2 = sval2[tid + 1];
}
}
}
}
};
} // namespace utility_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_REDUCTION_DETAIL_HPP__

View File

@@ -0,0 +1,395 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
#include "../functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace transform_detail
{
//! Read Write Traits
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
{
typedef typename TypeVec<T, shift>::vec_type read_type;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
{
typedef typename TypeVec<T1, shift>::vec_type read_type1;
typedef typename TypeVec<T2, shift>::vec_type read_type2;
typedef typename TypeVec<D, shift>::vec_type write_type;
};
//! Transform kernels
template <int shift> struct OpUnroller;
template <> struct OpUnroller<1>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
};
template <> struct OpUnroller<2>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
};
template <> struct OpUnroller<3>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
};
template <> struct OpUnroller<4>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <> struct OpUnroller<8>
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src.a7);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
{
if (mask(y, x_shifted))
dst.a0 = op(src1.a0, src2.a0);
if (mask(y, x_shifted + 1))
dst.a1 = op(src1.a1, src2.a1);
if (mask(y, x_shifted + 2))
dst.a2 = op(src1.a2, src2.a2);
if (mask(y, x_shifted + 3))
dst.a3 = op(src1.a3, src2.a3);
if (mask(y, x_shifted + 4))
dst.a4 = op(src1.a4, src2.a4);
if (mask(y, x_shifted + 5))
dst.a5 = op(src1.a5, src2.a5);
if (mask(y, x_shifted + 6))
dst.a6 = op(src1.a6, src2.a6);
if (mask(y, x_shifted + 7))
dst.a7 = op(src1.a7, src2.a7);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
{
typedef TransformFunctorTraits<UnOp> ft;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src_.cols)
{
const read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src.cols && y < src.rows && mask(y, x))
{
dst.ptr(y)[x] = op(src.ptr(y)[x]);
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
const Mask mask, const BinOp op)
{
typedef TransformFunctorTraits<BinOp> ft;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * ft::smart_shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
{
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el = ((const write_type*)dst)[x];
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
const Mask mask, const BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < src1.cols && y < src1.rows && mask(y, x))
{
const T1 src1_data = src1.ptr(y)[x];
const T2 src2_data = src2.ptr(y)[x];
dst.ptr(y)[x] = op(src1_data, src2_data);
}
}
template <bool UseSmart> struct TransformDispatcher;
template<> struct TransformDispatcher<false>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<> struct TransformDispatcher<true>
{
template <typename T, typename D, typename UnOp, typename Mask>
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
StaticAssert<ft::smart_shift != 1>::check();
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
{
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
return;
}
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
} // namespace transform_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__

View File

@@ -0,0 +1,187 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
#include "../common.hpp"
#include "../vec_traits.hpp"
namespace cv { namespace gpu { namespace device
{
namespace type_traits_detail
{
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
template <> struct IsIntegral<char> { enum {value = 1}; };
template <> struct IsIntegral<bool> { enum {value = 1}; };
template <typename T> struct IsFloat { enum {value = 0}; };
template <> struct IsFloat<float> { enum {value = 1}; };
template <> struct IsFloat<double> { enum {value = 1}; };
template <typename T> struct IsVec { enum {value = 0}; };
template <> struct IsVec<uchar1> { enum {value = 1}; };
template <> struct IsVec<uchar2> { enum {value = 1}; };
template <> struct IsVec<uchar3> { enum {value = 1}; };
template <> struct IsVec<uchar4> { enum {value = 1}; };
template <> struct IsVec<uchar8> { enum {value = 1}; };
template <> struct IsVec<char1> { enum {value = 1}; };
template <> struct IsVec<char2> { enum {value = 1}; };
template <> struct IsVec<char3> { enum {value = 1}; };
template <> struct IsVec<char4> { enum {value = 1}; };
template <> struct IsVec<char8> { enum {value = 1}; };
template <> struct IsVec<ushort1> { enum {value = 1}; };
template <> struct IsVec<ushort2> { enum {value = 1}; };
template <> struct IsVec<ushort3> { enum {value = 1}; };
template <> struct IsVec<ushort4> { enum {value = 1}; };
template <> struct IsVec<ushort8> { enum {value = 1}; };
template <> struct IsVec<short1> { enum {value = 1}; };
template <> struct IsVec<short2> { enum {value = 1}; };
template <> struct IsVec<short3> { enum {value = 1}; };
template <> struct IsVec<short4> { enum {value = 1}; };
template <> struct IsVec<short8> { enum {value = 1}; };
template <> struct IsVec<uint1> { enum {value = 1}; };
template <> struct IsVec<uint2> { enum {value = 1}; };
template <> struct IsVec<uint3> { enum {value = 1}; };
template <> struct IsVec<uint4> { enum {value = 1}; };
template <> struct IsVec<uint8> { enum {value = 1}; };
template <> struct IsVec<int1> { enum {value = 1}; };
template <> struct IsVec<int2> { enum {value = 1}; };
template <> struct IsVec<int3> { enum {value = 1}; };
template <> struct IsVec<int4> { enum {value = 1}; };
template <> struct IsVec<int8> { enum {value = 1}; };
template <> struct IsVec<float1> { enum {value = 1}; };
template <> struct IsVec<float2> { enum {value = 1}; };
template <> struct IsVec<float3> { enum {value = 1}; };
template <> struct IsVec<float4> { enum {value = 1}; };
template <> struct IsVec<float8> { enum {value = 1}; };
template <> struct IsVec<double1> { enum {value = 1}; };
template <> struct IsVec<double2> { enum {value = 1}; };
template <> struct IsVec<double3> { enum {value = 1}; };
template <> struct IsVec<double4> { enum {value = 1}; };
template <> struct IsVec<double8> { enum {value = 1}; };
template <class U> struct AddParameterType { typedef const U& type; };
template <class U> struct AddParameterType<U&> { typedef U& type; };
template <> struct AddParameterType<void> { typedef void type; };
template <class U> struct ReferenceTraits
{
enum { value = false };
typedef U type;
};
template <class U> struct ReferenceTraits<U&>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits
{
enum { value = false };
typedef void type;
};
template <class U> struct PointerTraits<U*>
{
enum { value = true };
typedef U type;
};
template <class U> struct PointerTraits<U*&>
{
enum { value = true };
typedef U type;
};
template <class U> struct UnConst
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnConst<const U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnConst<const U&>
{
typedef U& type;
enum { value = 1 };
};
template <class U> struct UnVolatile
{
typedef U type;
enum { value = 0 };
};
template <class U> struct UnVolatile<volatile U>
{
typedef U type;
enum { value = 1 };
};
template <class U> struct UnVolatile<volatile U&>
{
typedef U& type;
enum { value = 1 };
};
} // namespace type_traits_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__

View File

@@ -0,0 +1,117 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
#include "../datamov_utils.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vec_distance_detail
{
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
{
template <typename Dist, typename T1, typename T2>
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
{
if (ind < len)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, ind, val2);
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
}
}
template <typename Dist, typename T1, typename T2>
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
{
T1 val1 = *vecCached++;
T2 val2;
ForceGlob<T2>::Load(vecGlob, 0, val2);
vecGlob += THREAD_DIM;
dist.reduceIter(val1, val2);
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
}
};
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
{
}
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
{
}
};
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
}
};
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
{
template <typename Dist, typename T1, typename T2>
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
{
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
}
};
} // namespace vec_distance_detail
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__

View File

@@ -0,0 +1,80 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
namespace cv { namespace gpu { namespace device
{
template<class T> struct DynamicSharedMem
{
__device__ __forceinline__ operator T*()
{
extern __shared__ int __smem[];
return (T*)__smem;
}
__device__ __forceinline__ operator const T*() const
{
extern __shared__ int __smem[];
return (T*)__smem;
}
};
// specialize for double to avoid unaligned memory access compile errors
template<> struct DynamicSharedMem<double>
{
__device__ __forceinline__ operator double*()
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
__device__ __forceinline__ operator const double*() const
{
extern __shared__ double __smem_d[];
return (double*)__smem_d;
}
};
}}}
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__

View File

@@ -0,0 +1,139 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_GPU_EMULATION_HPP_
#define OPENCV_GPU_EMULATION_HPP_
#include "warp_reduce.hpp"
#include <stdio.h>
namespace cv { namespace gpu { namespace device
{
struct Emulation
{
static __device__ __forceinline__ int syncthreadsOr(int pred)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
// just campilation stab
return 0;
#else
return __syncthreads_or(pred);
#endif
}
template<int CTA_SIZE>
static __forceinline__ __device__ int Ballot(int predicate)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
return __ballot(predicate);
#else
__shared__ volatile int cta_buffer[CTA_SIZE];
int tid = threadIdx.x;
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
return warp_reduce(cta_buffer);
#endif
}
struct smem
{
enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
template<typename T>
static __device__ __forceinline__ T atomicInc(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count;
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
do
{
count = *address & TAG_MASK;
count = tag | (count + 1);
*address = count;
} while (*address != count);
return (count & TAG_MASK) - 1;
#else
return ::atomicInc(address, val);
#endif
}
template<typename T>
static __device__ __forceinline__ T atomicAdd(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count;
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
do
{
count = *address & TAG_MASK;
count = tag | (count + val);
*address = count;
} while (*address != count);
return (count & TAG_MASK) - val;
#else
return ::atomicAdd(address, val);
#endif
}
template<typename T>
static __device__ __forceinline__ T atomicMin(T* address, T val)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T count = ::min(*address, val);
do
{
*address = count;
} while (*address > count);
return count;
#else
return ::atomicMin(address, val);
#endif
}
};
};
}}} // namespace cv { namespace gpu { namespace device
#endif /* OPENCV_GPU_EMULATION_HPP_ */

View File

@@ -0,0 +1,278 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_FILTERS_HPP__
#define __OPENCV_GPU_FILTERS_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "vec_math.hpp"
#include "type_traits.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rz(y), __float2int_rz(x));
}
const Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
};
template <typename Ptr2D> struct CubicFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
: src(src_)
{
(void)fx;
(void)fy;
}
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
float x = fabsf(x_);
if (x <= 1.0f)
{
return x * x * (1.5f * x - 2.5f) + 1.0f;
}
else if (x < 2.0f)
{
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
}
else
{
return 0.0f;
}
}
__device__ elem_type operator ()(float y, float x) const
{
const float xmin = ::ceilf(x - 2.0f);
const float xmax = ::floorf(x + 2.0f);
const float ymin = ::ceilf(y - 2.0f);
const float ymax = ::floorf(y + 2.0f);
work_type sum = VecTraits<work_type>::all(0);
float wsum = 0.0f;
for (float cy = ymin; cy <= ymax; cy += 1.0f)
{
for (float cx = xmin; cx <= xmax; cx += 1.0f)
{
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
wsum += w;
}
}
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
return saturate_cast<elem_type>(res);
}
const Ptr2D src;
};
// for integer scaling
template <typename Ptr2D> struct IntegerAreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for(int dy = sy1; dy < sy2; ++dy)
for(int dx = sx1; dx < sx2; ++dx)
{
out = out + src(dy, dx) * scale;
}
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y ,scale;
};
template <typename Ptr2D> struct AreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(dy, dx) * scale;
if (sx1 > fsx1)
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y;
int width, haight;
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_FILTERS_HPP__

View File

@@ -0,0 +1,72 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
#include <cstdio>
namespace cv { namespace gpu { namespace device
{
template<class Func>
void printFuncAttrib(Func& func)
{
cudaFuncAttributes attrs;
cudaFuncGetAttributes(&attrs, func);
printf("=== Function stats ===\n");
printf("Name: \n");
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
printf("numRegs = %d\n", attrs.numRegs);
printf("ptxVersion = %d\n", attrs.ptxVersion);
printf("binaryVersion = %d\n", attrs.binaryVersion);
printf("\n");
fflush(stdout);
}
}}} // namespace cv { namespace gpu { namespace device
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */

View File

@@ -0,0 +1,687 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_FUNCTIONAL_HPP__
#define __OPENCV_GPU_FUNCTIONAL_HPP__
#include <thrust/functional.h>
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "type_traits.hpp"
#include "device_functions.h"
namespace cv { namespace gpu { namespace device
{
// Function Objects
using thrust::unary_function;
using thrust::binary_function;
// Arithmetic Operations
template <typename T> struct plus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a + b;
}
__device__ __forceinline__ plus(const plus& other):binary_function<T,T,T>(){}
__device__ __forceinline__ plus():binary_function<T,T,T>(){}
};
template <typename T> struct minus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a - b;
}
__device__ __forceinline__ minus(const minus& other):binary_function<T,T,T>(){}
__device__ __forceinline__ minus():binary_function<T,T,T>(){}
};
template <typename T> struct multiplies : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a * b;
}
__device__ __forceinline__ multiplies(const multiplies& other):binary_function<T,T,T>(){}
__device__ __forceinline__ multiplies():binary_function<T,T,T>(){}
};
template <typename T> struct divides : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a / b;
}
__device__ __forceinline__ divides(const divides& other):binary_function<T,T,T>(){}
__device__ __forceinline__ divides():binary_function<T,T,T>(){}
};
template <typename T> struct modulus : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a % b;
}
__device__ __forceinline__ modulus(const modulus& other):binary_function<T,T,T>(){}
__device__ __forceinline__ modulus():binary_function<T,T,T>(){}
};
template <typename T> struct negate : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
{
return -a;
}
__device__ __forceinline__ negate(const negate& other):unary_function<T,T>(){}
__device__ __forceinline__ negate():unary_function<T,T>(){}
};
// Comparison Operations
template <typename T> struct equal_to : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a == b;
}
__device__ __forceinline__ equal_to(const equal_to& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ equal_to():binary_function<T,T,bool>(){}
};
template <typename T> struct not_equal_to : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a != b;
}
__device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ not_equal_to():binary_function<T,T,bool>(){}
};
template <typename T> struct greater : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a > b;
}
__device__ __forceinline__ greater(const greater& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ greater():binary_function<T,T,bool>(){}
};
template <typename T> struct less : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a < b;
}
__device__ __forceinline__ less(const less& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ less():binary_function<T,T,bool>(){}
};
template <typename T> struct greater_equal : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a >= b;
}
__device__ __forceinline__ greater_equal(const greater_equal& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ greater_equal():binary_function<T,T,bool>(){}
};
template <typename T> struct less_equal : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a <= b;
}
__device__ __forceinline__ less_equal(const less_equal& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ less_equal():binary_function<T,T,bool>(){}
};
// Logical Operations
template <typename T> struct logical_and : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a && b;
}
__device__ __forceinline__ logical_and(const logical_and& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ logical_and():binary_function<T,T,bool>(){}
};
template <typename T> struct logical_or : binary_function<T, T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a || b;
}
__device__ __forceinline__ logical_or(const logical_or& other):binary_function<T,T,bool>(){}
__device__ __forceinline__ logical_or():binary_function<T,T,bool>(){}
};
template <typename T> struct logical_not : unary_function<T, bool>
{
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
{
return !a;
}
__device__ __forceinline__ logical_not(const logical_not& other):unary_function<T,bool>(){}
__device__ __forceinline__ logical_not():unary_function<T,bool>(){}
};
// Bitwise Operations
template <typename T> struct bit_and : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a & b;
}
__device__ __forceinline__ bit_and(const bit_and& other):binary_function<T,T,T>(){}
__device__ __forceinline__ bit_and():binary_function<T,T,T>(){}
};
template <typename T> struct bit_or : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a | b;
}
__device__ __forceinline__ bit_or(const bit_or& other):binary_function<T,T,T>(){}
__device__ __forceinline__ bit_or():binary_function<T,T,T>(){}
};
template <typename T> struct bit_xor : binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
typename TypeTraits<T>::ParameterType b) const
{
return a ^ b;
}
__device__ __forceinline__ bit_xor(const bit_xor& other):binary_function<T,T,T>(){}
__device__ __forceinline__ bit_xor():binary_function<T,T,T>(){}
};
template <typename T> struct bit_not : unary_function<T, T>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
{
return ~v;
}
__device__ __forceinline__ bit_not(const bit_not& other):unary_function<T,T>(){}
__device__ __forceinline__ bit_not():unary_function<T,T>(){}
};
// Generalized Identity Operations
template <typename T> struct identity : unary_function<T, T>
{
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
{
return x;
}
__device__ __forceinline__ identity(const identity& other):unary_function<T,T>(){}
__device__ __forceinline__ identity():unary_function<T,T>(){}
};
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
{
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
{
return lhs;
}
__device__ __forceinline__ project1st(const project1st& other):binary_function<T1,T2,T1>(){}
__device__ __forceinline__ project1st():binary_function<T1,T2,T1>(){}
};
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
{
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
{
return rhs;
}
__device__ __forceinline__ project2nd(const project2nd& other):binary_function<T1,T2,T2>(){}
__device__ __forceinline__ project2nd():binary_function<T1,T2,T2>(){}
};
// Min/Max Operations
#define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \
template <> struct name<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
__device__ __forceinline__ name(const name& other):binary_function<type, type, type>(){}\
__device__ __forceinline__ name():binary_function<type, type, type>(){}\
};
template <typename T> struct maximum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
{
return lhs < rhs ? rhs : lhs;
}
__device__ __forceinline__ maximum(const maximum& other):binary_function<T, T, T>(){}
__device__ __forceinline__ maximum():binary_function<T, T, T>(){}
};
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, ushort, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, short, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, int, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uint, ::max)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, ::fmax)
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, ::fmax)
template <typename T> struct minimum : binary_function<T, T, T>
{
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
{
return lhs < rhs ? lhs : rhs;
}
__device__ __forceinline__ minimum(const minimum& other):binary_function<T, T, T>(){}
__device__ __forceinline__ minimum():binary_function<T, T, T>(){}
};
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, ushort, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, short, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, int, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uint, ::min)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, float, ::fmin)
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
#undef OPENCV_GPU_IMPLEMENT_MINMAX
// Math functions
///bound=========================================
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \
template <typename T> struct name ## _func : unary_function<T, float> \
{ \
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
{ \
return func ## f(v); \
} \
}; \
template <> struct name ## _func<double> : unary_function<double, double> \
{ \
__device__ __forceinline__ double operator ()(double v) const \
{ \
return func(v); \
} \
};
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
template <typename T> struct name ## _func : binary_function<T, T, float> \
{ \
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
{ \
return func ## f(v1, v2); \
} \
}; \
template <> struct name ## _func<double> : binary_function<double, double, double> \
{ \
__device__ __forceinline__ double operator ()(double v1, double v2) const \
{ \
return func(v1, v2); \
} \
};
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(fabs, ::fabs)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log, ::log)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
#undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR
#undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
#undef OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
{
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
{
return src1 * src1 + src2 * src2;
}
__device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function<T, T, float>(){}
__device__ __forceinline__ hypot_sqr_func() : binary_function<T, T, float>(){}
};
// Saturate Cast Functor
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
{
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
{
return saturate_cast<D>(v);
}
__device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function<T, D>(){}
__device__ __forceinline__ saturate_cast_func():unary_function<T, D>(){}
};
// Threshold Functors
template <typename T> struct thresh_binary_func : unary_function<T, T>
{
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src > thresh) * maxVal;
}
__device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
: unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
__device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
const T thresh;
const T maxVal;
};
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
{
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src <= thresh) * maxVal;
}
__device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
: unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
__device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
const T thresh;
const T maxVal;
};
template <typename T> struct thresh_trunc_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return minimum<T>()(src, thresh);
}
__device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
: unary_function<T, T>(), thresh(other.thresh){}
__device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
const T thresh;
};
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src > thresh) * src;
}
__device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
: unary_function<T, T>(), thresh(other.thresh){}
__device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
const T thresh;
};
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
{
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
{
return (src <= thresh) * src;
}
__device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
: unary_function<T, T>(), thresh(other.thresh){}
__device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
const T thresh;
};
//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
// Function Object Adaptors
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
{
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
{
return !pred(x);
}
__device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
__device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
const Predicate pred;
};
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
{
return unary_negate<Predicate>(pred);
}
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
{
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
{
return !pred(x,y);
}
__device__ __forceinline__ binary_negate(const binary_negate& other)
: binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
__device__ __forceinline__ binary_negate() :
binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
const Predicate pred;
};
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
{
return binary_negate<BinaryPredicate>(pred);
}
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
{
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
{
return op(arg1, a);
}
__device__ __forceinline__ binder1st(const binder1st& other) :
unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
const Op op;
const typename Op::first_argument_type arg1;
};
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
{
return binder1st<Op>(op, typename Op::first_argument_type(x));
}
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
{
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
{
return op(a, arg2);
}
__device__ __forceinline__ binder2nd(const binder2nd& other) :
unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
const Op op;
const typename Op::second_argument_type arg2;
};
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
{
return binder2nd<Op>(op, typename Op::second_argument_type(x));
}
// Functor Traits
template <typename F> struct IsUnaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T, typename D> static Yes check(unary_function<T, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
template <typename F> struct IsBinaryFunction
{
typedef char Yes;
struct No {Yes a[2];};
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
static No check(...);
static F makeF();
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
};
namespace functional_detail
{
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
template <typename T, typename D> struct DefaultUnaryShift
{
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
};
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
{
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
};
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
template <typename Func> struct ShiftDispatcher<Func, true>
{
enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
};
template <typename Func> struct ShiftDispatcher<Func, false>
{
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
};
}
template <typename Func> struct DefaultTransformShift
{
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
};
template <typename Func> struct DefaultTransformFunctorTraits
{
enum { simple_block_dim_x = 16 };
enum { simple_block_dim_y = 16 };
enum { smart_block_dim_x = 16 };
enum { smart_block_dim_y = 16 };
enum { smart_shift = DefaultTransformShift<Func>::shift };
};
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
#define OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(type) \
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__

View File

@@ -0,0 +1,235 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
#define __OPENCV_GPU_LIMITS_GPU_HPP__
#include <limits>
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<class T> struct numeric_limits
{
typedef T type;
__device__ __forceinline__ static type min() { return type(); };
__device__ __forceinline__ static type max() { return type(); };
__device__ __forceinline__ static type epsilon() { return type(); }
__device__ __forceinline__ static type round_error() { return type(); }
__device__ __forceinline__ static type denorm_min() { return type(); }
__device__ __forceinline__ static type infinity() { return type(); }
__device__ __forceinline__ static type quiet_NaN() { return type(); }
__device__ __forceinline__ static type signaling_NaN() { return T(); }
static const bool is_signed;
};
template<> struct numeric_limits<bool>
{
typedef bool type;
__device__ __forceinline__ static type min() { return false; };
__device__ __forceinline__ static type max() { return true; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<char>
{
typedef char type;
__device__ __forceinline__ static type min() { return CHAR_MIN; };
__device__ __forceinline__ static type max() { return CHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (char)-1 == -1;
};
template<> struct numeric_limits<signed char>
{
typedef char type;
__device__ __forceinline__ static type min() { return SCHAR_MIN; };
__device__ __forceinline__ static type max() { return SCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = (signed char)-1 == -1;
};
template<> struct numeric_limits<unsigned char>
{
typedef unsigned char type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UCHAR_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<short>
{
typedef short type;
__device__ __forceinline__ static type min() { return SHRT_MIN; };
__device__ __forceinline__ static type max() { return SHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned short>
{
typedef unsigned short type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return USHRT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<int>
{
typedef int type;
__device__ __forceinline__ static type min() { return INT_MIN; };
__device__ __forceinline__ static type max() { return INT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned int>
{
typedef unsigned int type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return UINT_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<long>
{
typedef long type;
__device__ __forceinline__ static type min() { return LONG_MIN; };
__device__ __forceinline__ static type max() { return LONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<unsigned long>
{
typedef unsigned long type;
__device__ __forceinline__ static type min() { return 0; };
__device__ __forceinline__ static type max() { return ULONG_MAX; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = false;
};
template<> struct numeric_limits<float>
{
typedef float type;
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
__device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; };
__device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; };
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
template<> struct numeric_limits<double>
{
typedef double type;
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
__device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; };
__device__ __forceinline__ static type epsilon();
__device__ __forceinline__ static type round_error();
__device__ __forceinline__ static type denorm_min();
__device__ __forceinline__ static type infinity();
__device__ __forceinline__ static type quiet_NaN();
__device__ __forceinline__ static type signaling_NaN();
static const bool is_signed = true;
};
}}} // namespace cv { namespace gpu { namespace device {
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__

View File

@@ -0,0 +1,216 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__
#define __OPENCV_GPU_SATURATE_CAST_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
{
return (uchar) ::max((int)v, 0);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
{
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
{
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
{
return (uchar) ::min(v, (uint)UCHAR_MAX);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
{
return saturate_cast<uchar>((uint)v);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<uchar>(iv);
}
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<uchar>(iv);
#else
return saturate_cast<uchar>((float)v);
#endif
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
{
return (schar) ::min((int)v, SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
{
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
{
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
{
return saturate_cast<schar>((int)v);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
{
return (schar) ::min(v, (uint)SCHAR_MAX);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<schar>(iv);
}
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<schar>(iv);
#else
return saturate_cast<schar>((float)v);
#endif
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
{
return (ushort) ::max((int)v, 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
{
return (ushort) ::max((int)v, 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
{
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
{
return (ushort) ::min(v, (uint)USHRT_MAX);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<ushort>(iv);
}
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<ushort>(iv);
#else
return saturate_cast<ushort>((float)v);
#endif
}
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
{
return (short) ::min((int)v, SHRT_MAX);
}
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
{
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
}
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
{
return (short) ::min(v, (uint)SHRT_MAX);
}
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
{
int iv = __float2int_rn(v);
return saturate_cast<short>(iv);
}
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
int iv = __double2int_rn(v);
return saturate_cast<short>(iv);
#else
return saturate_cast<short>((float)v);
#endif
}
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
{
return __float2int_rn(v);
}
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2int_rn(v);
#else
return saturate_cast<int>((float)v);
#endif
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
{
return __float2uint_rn(v);
}
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
return __double2uint_rn(v);
#else
return saturate_cast<uint>((float)v);
#endif
}
}}}
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */

View File

@@ -0,0 +1,171 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_SCAN_HPP__
#define __OPENCV_GPU_SCAN_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
template <ScanKind Kind, typename T, typename F> struct WarpScan
{
__device__ __forceinline__ WarpScan() {}
__device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
{
const unsigned int lane = idx & 31;
F op;
if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
if( Kind == INCLUSIVE )
return ptr [idx];
else
return (lane > 0) ? ptr [idx - 1] : 0;
}
__device__ __forceinline__ unsigned int index(const unsigned int tid)
{
return tid;
}
__device__ __forceinline__ void init(volatile T *ptr){}
static const int warp_offset = 0;
typedef WarpScan<INCLUSIVE, T, F> merge;
};
template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
{
__device__ __forceinline__ WarpScanNoComp() {}
__device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
{
const unsigned int lane = threadIdx.x & 31;
F op;
ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
if( Kind == INCLUSIVE )
return ptr [idx];
else
return (lane > 0) ? ptr [idx - 1] : 0;
}
__device__ __forceinline__ unsigned int index(const unsigned int tid)
{
return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
}
__device__ __forceinline__ void init(volatile T *ptr)
{
ptr[threadIdx.x] = 0;
}
static const int warp_smem_stride = 32 + 16 + 1;
static const int warp_offset = 16;
static const int warp_log = 5;
static const int warp_mask = 31;
typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
};
template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
{
__device__ __forceinline__ BlockScan() {}
__device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
__device__ __forceinline__ T operator()(volatile T *ptr)
{
const unsigned int tid = threadIdx.x;
const unsigned int lane = tid & warp_mask;
const unsigned int warp = tid >> warp_log;
Sc scan;
typename Sc::merge merge_scan;
const unsigned int idx = scan.index(tid);
T val = scan(ptr, idx);
__syncthreads ();
if( warp == 0)
scan.init(ptr);
__syncthreads ();
if( lane == 31 )
ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
__syncthreads ();
if( warp == 0 )
merge_scan(ptr, idx);
__syncthreads();
if ( warp > 0)
val = ptr [scan.warp_offset + warp - 1] + val;
__syncthreads ();
ptr[idx] = val;
__syncthreads ();
return val ;
}
static const int warp_log = 5;
static const int warp_mask = 31;
};
}}}
#endif // __OPENCV_GPU_SCAN_HPP__

View File

@@ -0,0 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
#define __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__
#if defined(__CUDACC__)
#define __OPENCV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __OPENCV_GPU_HOST_DEVICE__
#endif
namespace cv { namespace gpu
{
namespace device
{
template<bool expr> struct Static {};
template<> struct Static<true>
{
__OPENCV_GPU_HOST_DEVICE__ static void check() {};
};
}
}}
#undef __OPENCV_GPU_HOST_DEVICE__
#endif /* __OPENCV_GPU_GPU_DEVICE_STATIC_CHECK_HPP__ */

View File

@@ -0,0 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "common.hpp"
#include "utility.hpp"
#include "detail/transform_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<UnOp> ft;
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
{
typedef TransformFunctorTraits<BinOp> ft;
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
}
}}}
#endif // __OPENCV_GPU_TRANSFORM_HPP__

View File

@@ -0,0 +1,82 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
#include "detail/type_traits_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct IsSimpleParameter
{
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
};
template <typename T> struct TypeTraits
{
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
enum { isConst = type_traits_detail::UnConst<T>::value };
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
enum { isArith = isIntegral || isFloat };
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
};
}}}
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__

View File

@@ -0,0 +1,237 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_UTILITY_HPP__
#define __OPENCV_GPU_UTILITY_HPP__
#include "saturate_cast.hpp"
#include "datamov_utils.hpp"
#include "detail/reduction_detail.hpp"
namespace cv { namespace gpu { namespace device
{
#define OPENCV_GPU_LOG_WARP_SIZE (5)
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
///////////////////////////////////////////////////////////////////////////////
// swap
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
{
const T temp = a;
a = b;
b = temp;
}
///////////////////////////////////////////////////////////////////////////////
// Mask Reader
struct SingleMask
{
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x] != 0;
}
PtrStepb mask;
};
struct SingleMaskChannels
{
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
: mask(mask_), channels(channels_) {}
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
:mask(mask_.mask), channels(mask_.channels){}
__device__ __forceinline__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x / channels] != 0;
}
PtrStepb mask;
int channels;
};
struct MaskCollection
{
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
: maskCollection(maskCollection_) {}
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
__device__ __forceinline__ void next()
{
curMask = *maskCollection++;
}
__device__ __forceinline__ void setMask(int z)
{
curMask = maskCollection[z];
}
__device__ __forceinline__ bool operator()(int y, int x) const
{
uchar val;
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
}
const PtrStepb* maskCollection;
PtrStepb curMask;
};
struct WithOutMask
{
__device__ __forceinline__ WithOutMask(){}
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
__device__ __forceinline__ void next() const
{
}
__device__ __forceinline__ void setMask(int) const
{
}
__device__ __forceinline__ bool operator()(int, int) const
{
return true;
}
__device__ __forceinline__ bool operator()(int, int, int) const
{
return true;
}
static __device__ __forceinline__ bool check(int, int)
{
return true;
}
static __device__ __forceinline__ bool check(int, int, int)
{
return true;
}
};
///////////////////////////////////////////////////////////////////////////////
// Reduction
template <int n, typename T, typename Op> __device__ __forceinline__ void reduce(volatile T* data, T& partial_reduction, int tid, const Op& op)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::ReductionDispatcher<n <= 64>::reduce<n>(data, partial_reduction, tid, op);
}
template <int n, typename T, typename V, typename Pred>
__device__ __forceinline__ void reducePredVal(volatile T* sdata, T& myData, V* sval, V& myVal, int tid, const Pred& pred)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::PredValReductionDispatcher<n <= 64>::reduce<n>(myData, myVal, sdata, sval, tid, pred);
}
template <int n, typename T, typename V1, typename V2, typename Pred>
__device__ __forceinline__ void reducePredVal2(volatile T* sdata, T& myData, V1* sval1, V1& myVal1, V2* sval2, V2& myVal2, int tid, const Pred& pred)
{
StaticAssert<n >= 8 && n <= 512>::check();
utility_detail::PredVal2ReductionDispatcher<n <= 64>::reduce<n>(myData, myVal1, myVal2, sdata, sval1, sval2, tid, pred);
}
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
// solve 2x2 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
{
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
return true;
}
return false;
}
// solve 3x3 linear system Ax=b
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
{
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
if (det != 0)
{
double invdet = 1.0 / det;
x[0] = saturate_cast<T>(invdet *
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
x[1] = saturate_cast<T>(invdet *
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
x[2] = saturate_cast<T>(invdet *
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
return true;
}
return false;
}
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_UTILITY_HPP__

View File

@@ -0,0 +1,224 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__
#define __OPENCV_GPU_VEC_DISTANCE_HPP__
#include "utility.hpp"
#include "functional.hpp"
#include "detail/vec_distance_detail.hpp"
namespace cv { namespace gpu { namespace device
{
template <typename T> struct L1Dist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ L1Dist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum = __sad(val1, val2, mySum);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
template <> struct L1Dist<float>
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
mySum += ::fabs(val1 - val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
}
__device__ __forceinline__ operator float() const
{
return mySum;
}
float mySum;
};
struct L2Dist
{
typedef float value_type;
typedef float result_type;
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
__device__ __forceinline__ void reduceIter(float val1, float val2)
{
float reg = val1 - val2;
mySum += reg * reg;
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile float>());
}
__device__ __forceinline__ operator float() const
{
return sqrtf(mySum);
}
float mySum;
};
struct HammingDist
{
typedef int value_type;
typedef int result_type;
__device__ __forceinline__ HammingDist() : mySum(0) {}
__device__ __forceinline__ void reduceIter(int val1, int val2)
{
mySum += __popc(val1 ^ val2);
}
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
{
reduce<THREAD_DIM>(smem, mySum, tid, plus<volatile int>());
}
__device__ __forceinline__ operator int() const
{
return mySum;
}
int mySum;
};
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
for (int i = tid; i < len; i += THREAD_DIM)
{
T1 val1;
ForceGlob<T1>::Load(vec1, i, val1);
T2 val2;
ForceGlob<T2>::Load(vec2, i, val2);
dist.reduceIter(val1, val2);
}
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
{
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
dist.reduceAll<THREAD_DIM>(smem, tid);
}
// calc distance between two vectors in global memory
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
{
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
{
vec1 = vec1_;
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
}
const T1* vec1;
};
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
{
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
{
if (glob_tid < len)
smem[glob_tid] = vec1[glob_tid];
__syncthreads();
U* vec1ValsPtr = vec1Vals;
#pragma unroll
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
*vec1ValsPtr++ = smem[i];
__syncthreads();
}
template <typename T2, typename Dist>
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
{
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
}
U vec1Vals[MAX_LEN / THREAD_DIM];
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__

View File

@@ -0,0 +1,330 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VECMATH_HPP__
#define __OPENCV_GPU_VECMATH_HPP__
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
#include "functional.hpp"
namespace cv { namespace gpu { namespace device
{
namespace vec_math_detail
{
template <int cn, typename VecD> struct SatCastHelper;
template <typename VecD> struct SatCastHelper<1, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
}
};
template <typename VecD> struct SatCastHelper<2, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
}
};
template <typename VecD> struct SatCastHelper<3, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
}
};
template <typename VecD> struct SatCastHelper<4, VecD>
{
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
typedef typename VecTraits<VecD>::elem_type D;
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
};
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
{
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
}
}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
}
namespace vec_math_detail
{
template <typename T1, typename T2> struct BinOpTraits
{
typedef int argument_type;
};
template <typename T> struct BinOpTraits<T, T>
{
typedef T argument_type;
};
template <typename T> struct BinOpTraits<T, double>
{
typedef double argument_type;
};
template <typename T> struct BinOpTraits<double, T>
{
typedef double argument_type;
};
template <> struct BinOpTraits<double, double>
{
typedef double argument_type;
};
template <typename T> struct BinOpTraits<T, float>
{
typedef float argument_type;
};
template <typename T> struct BinOpTraits<float, T>
{
typedef float argument_type;
};
template <> struct BinOpTraits<float, float>
{
typedef float argument_type;
};
template <> struct BinOpTraits<double, float>
{
typedef double argument_type;
};
template <> struct BinOpTraits<float, double>
{
typedef double argument_type;
};
}
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
__device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
} \
__device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
{ \
func<type> f; \
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
{ \
func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
} \
template <typename T> \
__device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
{ \
func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
}
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VECMATH_HPP__

View File

@@ -0,0 +1,280 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
#define __OPENCV_GPU_VEC_TRAITS_HPP__
#include "common.hpp"
namespace cv { namespace gpu { namespace device
{
template<typename T, int N> struct TypeVec;
struct __align__(8) uchar8
{
uchar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
{
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(8) char8
{
schar a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
{
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) ushort8
{
ushort a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
{
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(16) short8
{
short a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
{
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) uint8
{
uint a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
{
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) int8
{
int a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct __align__(32) float8
{
float a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
{
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
struct double8
{
double a0, a1, a2, a3, a4, a5, a6, a7;
};
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
{
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
return val;
}
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
template<typename T> struct VecTraits;
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
template<> struct VecTraits<type> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
typedef type elem_type; \
enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
typedef type elem_type; \
enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
typedef type elem_type; \
enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
typedef type elem_type; \
enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \
template<> struct VecTraits<type ## 8> \
{ \
typedef type elem_type; \
enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__

View File

@@ -0,0 +1,112 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
#define __OPENCV_GPU_DEVICE_WARP_HPP__
namespace cv { namespace gpu { namespace device
{
struct Warp
{
enum
{
LOG_WARP_SIZE = 5,
WARP_SIZE = 1 << LOG_WARP_SIZE,
STRIDE = WARP_SIZE
};
/** \brief Returns the warp lane ID of the calling thread. */
static __device__ __forceinline__ unsigned int laneId()
{
unsigned int ret;
asm("mov.u32 %0, %laneid;" : "=r"(ret) );
return ret;
}
template<typename It, typename T>
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
{
for(It t = beg + laneId(); t < end; t += STRIDE)
*t = value;
}
template<typename InIt, typename OutIt>
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = *t;
return out;
}
template<typename InIt, typename OutIt, class UnOp>
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
{
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
*out = op(*t);
return out;
}
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
{
unsigned int lane = laneId();
InIt1 t1 = beg1 + lane;
InIt2 t2 = beg2 + lane;
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
*out = op(*t1, *t2);
return out;
}
template<typename OutIt, typename T>
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
{
unsigned int lane = laneId();
value += lane;
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
*t = value;
}
};
}}} // namespace cv { namespace gpu { namespace device
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */

View File

@@ -0,0 +1,69 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
#define OPENCV_GPU_WARP_REDUCE_HPP__
namespace cv { namespace gpu { namespace device
{
template <class T>
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
{
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
if (lane < 16)
{
T partial = ptr[tid];
ptr[tid] = partial = partial + ptr[tid + 16];
ptr[tid] = partial = partial + ptr[tid + 8];
ptr[tid] = partial = partial + ptr[tid + 4];
ptr[tid] = partial = partial + ptr[tid + 2];
ptr[tid] = partial = partial + ptr[tid + 1];
}
return ptr[tid - lane];
}
}}} // namespace cv { namespace gpu { namespace device {
#endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */